]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - Documentation/perf_counter/builtin-record.c
perf_counter tools: Sample and display frequency adjustment changes
[mv-sheeva.git] / Documentation / perf_counter / builtin-record.c
index 96bfb7c5f1e4fd67b7c94c84823f448642cf6318..43ddab31ac399c894e27fdf3bb057ad7f7d3ed55 100644 (file)
@@ -1,23 +1,33 @@
-
+/*
+ * builtin-record.c
+ *
+ * Builtin record command: Record the profile of a workload
+ * (or a CPU, or a PID) into the perf.data output file - for
+ * later analysis via perf report.
+ */
+#include "builtin.h"
 
 #include "perf.h"
-#include "builtin.h"
+
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/string.h"
 
+#include <unistd.h>
 #include <sched.h>
 
 #define ALIGN(x, a)            __ALIGN_MASK(x, (typeof(x))(a)-1)
 #define __ALIGN_MASK(x, mask)  (((x)+(mask))&~(mask))
 
-static int                     default_interval = 100000;
-static int                     event_count[MAX_COUNTERS];
+static long                    default_interval = 100000;
+static long                    event_count[MAX_COUNTERS];
 
 static int                     fd[MAX_NR_CPUS][MAX_COUNTERS];
-static int                     nr_cpus                         =  0;
+static int                     nr_cpus                         = 0;
 static unsigned int            page_size;
-static unsigned int            mmap_pages                      = 16;
+static unsigned int            mmap_pages                      = 128;
+static int                     freq                            = 0;
 static int                     output;
 static const char              *output_name                    = "perf.data";
 static int                     group                           = 0;
@@ -25,7 +35,8 @@ static unsigned int           realtime_prio                   = 0;
 static int                     system_wide                     = 0;
 static pid_t                   target_pid                      = -1;
 static int                     inherit                         = 1;
-static int                     nmi                             = 1;
+static int                     force                           = 0;
+static int                     append_file                     = 0;
 
 const unsigned int default_count[] = {
        1000000,
@@ -54,9 +65,11 @@ static unsigned int mmap_read_head(struct mmap_data *md)
        return head;
 }
 
-static long events;
+static long samples;
 static struct timeval last_read, this_read;
 
+static __u64 bytes_written;
+
 static void mmap_read(struct mmap_data *md)
 {
        unsigned int head = mmap_read_head(md);
@@ -70,7 +83,7 @@ static void mmap_read(struct mmap_data *md)
 
        /*
         * If we're further behind than half the buffer, there's a chance
-        * the writer will bite our tail and screw up the events under us.
+        * the writer will bite our tail and mess up the samples under us.
         *
         * If we somehow ended up ahead of the head, we got messed up.
         *
@@ -96,7 +109,7 @@ static void mmap_read(struct mmap_data *md)
        last_read = this_read;
 
        if (old != head)
-               events++;
+               samples++;
 
        size = head - old;
 
@@ -104,28 +117,34 @@ static void mmap_read(struct mmap_data *md)
                buf = &data[old & md->mask];
                size = md->mask + 1 - (old & md->mask);
                old += size;
+
                while (size) {
                        int ret = write(output, buf, size);
-                       if (ret < 0) {
-                               perror("failed to write");
-                               exit(-1);
-                       }
+
+                       if (ret < 0)
+                               die("failed to write");
+
                        size -= ret;
                        buf += ret;
+
+                       bytes_written += ret;
                }
        }
 
        buf = &data[old & md->mask];
        size = head - old;
        old += size;
+
        while (size) {
                int ret = write(output, buf, size);
-               if (ret < 0) {
-                       perror("failed to write");
-                       exit(-1);
-               }
+
+               if (ret < 0)
+                       die("failed to write");
+
                size -= ret;
                buf += ret;
+
+               bytes_written += ret;
        }
 
        md->prev = old;
@@ -161,16 +180,16 @@ struct comm_event {
        char                            comm[16];
 };
 
-static pid_t pid_synthesize_comm_event(pid_t pid)
+static void pid_synthesize_comm_event(pid_t pid, int full)
 {
        struct comm_event comm_ev;
        char filename[PATH_MAX];
-       pid_t spid, ppid;
        char bf[BUFSIZ];
-       int fd, nr, ret;
-       char comm[18];
+       int fd, ret;
        size_t size;
-       char state;
+       char *field, *sep;
+       DIR *tasks;
+       struct dirent dirent, *next;
 
        snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
 
@@ -185,32 +204,60 @@ static pid_t pid_synthesize_comm_event(pid_t pid)
        }
        close(fd);
 
+       /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
        memset(&comm_ev, 0, sizeof(comm_ev));
-        nr = sscanf(bf, "%d %s %c %d %d ",
-                       &spid, comm, &state, &ppid, &comm_ev.pid);
-       if (nr != 5) {
-               fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
-                       filename);
-               exit(EXIT_FAILURE);
-       }
+       field = strchr(bf, '(');
+       if (field == NULL)
+               goto out_failure;
+       sep = strchr(++field, ')');
+       if (sep == NULL)
+               goto out_failure;
+       size = sep - field;
+       memcpy(comm_ev.comm, field, size++);
+
+       comm_ev.pid = pid;
        comm_ev.header.type = PERF_EVENT_COMM;
-       comm_ev.tid = pid;
-       size = strlen(comm);
-       comm[--size] = '\0'; /* Remove the ')' at the end */
-       --size; /* Remove the '(' at the begin */
-       memcpy(comm_ev.comm, comm + 1, size);
        size = ALIGN(size, sizeof(uint64_t));
        comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
 
-       ret = write(output, &comm_ev, comm_ev.header.size);
-       if (ret < 0) {
-               perror("failed to write");
-               exit(-1);
+       if (!full) {
+               comm_ev.tid = pid;
+
+               ret = write(output, &comm_ev, comm_ev.header.size);
+               if (ret < 0) {
+                       perror("failed to write");
+                       exit(-1);
+               }
+               return;
        }
-       return comm_ev.pid;
+
+       snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
+
+       tasks = opendir(filename);
+       while (!readdir_r(tasks, &dirent, &next) && next) {
+               char *end;
+               pid = strtol(dirent.d_name, &end, 10);
+               if (*end)
+                       continue;
+
+               comm_ev.tid = pid;
+
+               ret = write(output, &comm_ev, comm_ev.header.size);
+               if (ret < 0) {
+                       perror("failed to write");
+                       exit(-1);
+               }
+       }
+       closedir(tasks);
+       return;
+
+out_failure:
+       fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
+               filename);
+       exit(EXIT_FAILURE);
 }
 
-static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
+static void pid_synthesize_mmap_samples(pid_t pid)
 {
        char filename[PATH_MAX];
        FILE *fp;
@@ -223,23 +270,25 @@ static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
                exit(EXIT_FAILURE);
        }
        while (1) {
-               char bf[BUFSIZ];
-               unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
+               char bf[BUFSIZ], *pbf = bf;
                struct mmap_event mmap_ev = {
                        .header.type = PERF_EVENT_MMAP,
                };
-               unsigned long ino;
-               int major, minor;
+               int n;
                size_t size;
                if (fgets(bf, sizeof(bf), fp) == NULL)
                        break;
 
                /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
-                       &mmap_ev.start, &mmap_ev.len,
-                        &vm_read, &vm_write, &vm_exec, &vm_mayshare,
-                        &mmap_ev.pgoff, &major, &minor, &ino);
-               if (vm_exec == 'x') {
+               n = hex2u64(pbf, &mmap_ev.start);
+               if (n < 0)
+                       continue;
+               pbf += n + 1;
+               n = hex2u64(pbf, &mmap_ev.len);
+               if (n < 0)
+                       continue;
+               pbf += n + 3;
+               if (*pbf == 'x') { /* vm_exec */
                        char *execname = strrchr(bf, ' ');
 
                        if (execname == NULL || execname[1] != '/')
@@ -253,7 +302,7 @@ static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
                        mmap_ev.len -= mmap_ev.start;
                        mmap_ev.header.size = (sizeof(mmap_ev) -
                                               (sizeof(mmap_ev.filename) - size));
-                       mmap_ev.pid = pgid;
+                       mmap_ev.pid = pid;
                        mmap_ev.tid = pid;
 
                        if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
@@ -266,73 +315,103 @@ static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
        fclose(fp);
 }
 
-static void open_counters(int cpu, pid_t pid)
+static void synthesize_samples(void)
 {
-       struct perf_counter_hw_event hw_event;
-       int counter, group_fd;
+       DIR *proc;
+       struct dirent dirent, *next;
+
+       proc = opendir("/proc");
+
+       while (!readdir_r(proc, &dirent, &next) && next) {
+               char *end;
+               pid_t pid;
+
+               pid = strtol(dirent.d_name, &end, 10);
+               if (*end) /* only interested in proper numerical dirents */
+                       continue;
+
+               pid_synthesize_comm_event(pid, 1);
+               pid_synthesize_mmap_samples(pid);
+       }
+
+       closedir(proc);
+}
+
+static int group_fd;
+
+static void create_counter(int counter, int cpu, pid_t pid)
+{
+       struct perf_counter_attr attr;
        int track = 1;
 
-       if (pid > 0) {
-               pid_t pgid = pid_synthesize_comm_event(pid);
-               pid_synthesize_mmap_events(pid, pgid);
+       memset(&attr, 0, sizeof(attr));
+       attr.config             = event_id[counter];
+       attr.sample_period      = event_count[counter];
+       attr.sample_type        = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
+       attr.freq               = freq;
+       attr.mmap               = track;
+       attr.comm               = track;
+       attr.inherit            = (cpu < 0) && inherit;
+
+       track = 0; /* only the first counter needs these */
+
+       fd[nr_cpu][counter] = sys_perf_counter_open(&attr, pid, cpu, group_fd, 0);
+
+       if (fd[nr_cpu][counter] < 0) {
+               int err = errno;
+
+               error("syscall returned with %d (%s)\n",
+                               fd[nr_cpu][counter], strerror(err));
+               if (err == EPERM)
+                       printf("Are you root?\n");
+               exit(-1);
        }
+       assert(fd[nr_cpu][counter] >= 0);
+       fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
 
-       group_fd = -1;
-       for (counter = 0; counter < nr_counters; counter++) {
+       /*
+        * First counter acts as the group leader:
+        */
+       if (group && group_fd == -1)
+               group_fd = fd[nr_cpu][counter];
+
+       event_array[nr_poll].fd = fd[nr_cpu][counter];
+       event_array[nr_poll].events = POLLIN;
+       nr_poll++;
+
+       mmap_array[nr_cpu][counter].counter = counter;
+       mmap_array[nr_cpu][counter].prev = 0;
+       mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
+       mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+                       PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
+       if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
+               error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+               exit(-1);
+       }
+}
 
-               memset(&hw_event, 0, sizeof(hw_event));
-               hw_event.config         = event_id[counter];
-               hw_event.irq_period     = event_count[counter];
-               hw_event.record_type    = PERF_RECORD_IP | PERF_RECORD_TID;
-               hw_event.nmi            = nmi;
-               hw_event.mmap           = track;
-               hw_event.comm           = track;
-               hw_event.inherit        = (cpu < 0) && inherit;
-
-               track = 0; // only the first counter needs these
-
-               fd[nr_cpu][counter] =
-                       sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
-
-               if (fd[nr_cpu][counter] < 0) {
-                       int err = errno;
-                       printf("kerneltop error: syscall returned with %d (%s)\n",
-                                       fd[nr_cpu][counter], strerror(err));
-                       if (err == EPERM)
-                               printf("Are you root?\n");
-                       exit(-1);
-               }
-               assert(fd[nr_cpu][counter] >= 0);
-               fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
+static void open_counters(int cpu, pid_t pid)
+{
+       int counter;
 
-               /*
-                * First counter acts as the group leader:
-                */
-               if (group && group_fd == -1)
-                       group_fd = fd[nr_cpu][counter];
-
-               event_array[nr_poll].fd = fd[nr_cpu][counter];
-               event_array[nr_poll].events = POLLIN;
-               nr_poll++;
-
-               mmap_array[nr_cpu][counter].counter = counter;
-               mmap_array[nr_cpu][counter].prev = 0;
-               mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
-               mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-                               PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
-               if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
-                       printf("kerneltop error: failed to mmap with %d (%s)\n",
-                                       errno, strerror(errno));
-                       exit(-1);
-               }
+       if (pid > 0) {
+               pid_synthesize_comm_event(pid, 0);
+               pid_synthesize_mmap_samples(pid);
        }
+
+       group_fd = -1;
+       for (counter = 0; counter < nr_counters; counter++)
+               create_counter(counter, cpu, pid);
+
        nr_cpu++;
 }
 
 static int __cmd_record(int argc, const char **argv)
 {
        int i, counter;
+       struct stat st;
        pid_t pid;
+       int flags;
        int ret;
 
        page_size = sysconf(_SC_PAGE_SIZE);
@@ -340,14 +419,26 @@ static int __cmd_record(int argc, const char **argv)
        assert(nr_cpus <= MAX_NR_CPUS);
        assert(nr_cpus >= 0);
 
-       output = open(output_name, O_CREAT|O_EXCL|O_RDWR, S_IRWXU);
+       if (!stat(output_name, &st) && !force && !append_file) {
+               fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
+                               output_name);
+               exit(-1);
+       }
+
+       flags = O_CREAT|O_RDWR;
+       if (append_file)
+               flags |= O_APPEND;
+       else
+               flags |= O_TRUNC;
+
+       output = open(output_name, flags, S_IRUSR|S_IWUSR);
        if (output < 0) {
                perror("failed to create output file");
                exit(-1);
        }
 
        if (!system_wide) {
-               open_counters(-1, target_pid != -1 ? target_pid : 0);
+               open_counters(-1, target_pid != -1 ? target_pid : getpid());
        } else for (i = 0; i < nr_cpus; i++)
                open_counters(i, target_pid);
 
@@ -377,22 +468,30 @@ static int __cmd_record(int argc, const char **argv)
                }
        }
 
-       /*
-        * TODO: store the current /proc/$/maps information somewhere
-        */
+       if (system_wide)
+               synthesize_samples();
 
        while (!done) {
-               int hits = events;
+               int hits = samples;
 
                for (i = 0; i < nr_cpu; i++) {
                        for (counter = 0; counter < nr_counters; counter++)
                                mmap_read(&mmap_array[i][counter]);
                }
 
-               if (hits == events)
+               if (hits == samples)
                        ret = poll(event_array, nr_poll, 100);
        }
 
+       /*
+        * Approximate RIP event size: 24 bytes.
+        */
+       fprintf(stderr,
+               "[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
+               (double)bytes_written / 1024.0 / 1024.0,
+               output_name,
+               bytes_written / 24);
+
        return 0;
 }
 
@@ -407,20 +506,26 @@ static char events_help_msg[EVENTS_HELP_MAX];
 static const struct option options[] = {
        OPT_CALLBACK('e', "event", NULL, "event",
                     events_help_msg, parse_events),
-       OPT_INTEGER('c', "count", &default_interval,
-                   "event period to sample"),
-       OPT_INTEGER('m', "mmap-pages", &mmap_pages,
-                   "number of mmap data pages"),
-       OPT_STRING('o', "output", &output_name, "file",
-                   "output file name"),
-       OPT_BOOLEAN('i', "inherit", &inherit,
-                   "child tasks inherit counters"),
        OPT_INTEGER('p', "pid", &target_pid,
                    "record events on existing pid"),
        OPT_INTEGER('r', "realtime", &realtime_prio,
                    "collect data with this RT SCHED_FIFO priority"),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                            "system-wide collection from all CPUs"),
+       OPT_BOOLEAN('A', "append", &append_file,
+                           "append to the output file to do incremental profiling"),
+       OPT_BOOLEAN('f', "force", &force,
+                       "overwrite existing data file"),
+       OPT_LONG('c', "count", &default_interval,
+                   "event period to sample"),
+       OPT_STRING('o', "output", &output_name, "file",
+                   "output file name"),
+       OPT_BOOLEAN('i', "inherit", &inherit,
+                   "child tasks inherit counters"),
+       OPT_INTEGER('F', "freq", &freq,
+                   "profile at this frequency"),
+       OPT_INTEGER('m', "mmap-pages", &mmap_pages,
+                   "number of mmap data pages"),
        OPT_END()
 };
 
@@ -439,6 +544,10 @@ int cmd_record(int argc, const char **argv, const char *prefix)
                event_id[0] = 0;
        }
 
+       if (freq) {
+               default_interval = freq;
+               freq = 1;
+       }
        for (counter = 0; counter < nr_counters; counter++) {
                if (event_count[counter])
                        continue;