2 * builtin-timechart.c - make an svg timechart of system activity
4 * (C) Copyright 2009 Intel Corporation
7 * Arjan van de Ven <arjan@linux.intel.com>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
17 #include "util/util.h"
19 #include "util/color.h"
20 #include <linux/list.h>
21 #include "util/cache.h"
22 #include <linux/rbtree.h>
23 #include "util/symbol.h"
24 #include "util/string.h"
25 #include "util/callchain.h"
26 #include "util/strlist.h"
29 #include "util/header.h"
30 #include "util/parse-options.h"
31 #include "util/parse-events.h"
32 #include "util/svghelper.h"
34 static char const *input_name = "perf.data";
35 static char const *output_name = "output.svg";
38 static unsigned long page_size;
39 static unsigned long mmap_window = 32;
40 static u64 sample_type;
42 static unsigned int numcpus;
43 static u64 min_freq; /* Lowest CPU frequency seen */
44 static u64 max_freq; /* Highest CPU frequency seen */
45 static u64 turbo_frequency;
47 static u64 first_time, last_time;
50 static struct perf_header *header;
59 struct sample_wrapper;
62 * Datastructure layout:
63 * We keep an list of "pid"s, matching the kernels notion of a task struct.
64 * Each "pid" entry, has a list of "comm"s.
65 * this is because we want to track different programs different, while
66 * exec will reuse the original pid (by design).
67 * Each comm has a list of samples that will be used to draw
82 struct per_pidcomm *all;
83 struct per_pidcomm *current;
90 struct per_pidcomm *next;
104 struct cpu_sample *samples;
107 struct sample_wrapper {
108 struct sample_wrapper *next;
111 unsigned char data[0];
115 #define TYPE_RUNNING 1
116 #define TYPE_WAITING 2
117 #define TYPE_BLOCKED 3
120 struct cpu_sample *next;
128 static struct per_pid *all_data;
134 struct power_event *next;
143 struct wake_event *next;
149 static struct power_event *power_events;
150 static struct wake_event *wake_events;
152 struct sample_wrapper *all_samples;
154 static struct per_pid *find_create_pid(int pid)
156 struct per_pid *cursor = all_data;
159 if (cursor->pid == pid)
161 cursor = cursor->next;
163 cursor = malloc(sizeof(struct per_pid));
164 assert(cursor != NULL);
165 memset(cursor, 0, sizeof(struct per_pid));
167 cursor->next = all_data;
172 static void pid_set_comm(int pid, char *comm)
175 struct per_pidcomm *c;
176 p = find_create_pid(pid);
179 if (c->comm && strcmp(c->comm, comm) == 0) {
184 c->comm = strdup(comm);
190 c = malloc(sizeof(struct per_pidcomm));
192 memset(c, 0, sizeof(struct per_pidcomm));
193 c->comm = strdup(comm);
199 static void pid_fork(int pid, int ppid, u64 timestamp)
201 struct per_pid *p, *pp;
202 p = find_create_pid(pid);
203 pp = find_create_pid(ppid);
205 if (pp->current && pp->current->comm && !p->current)
206 pid_set_comm(pid, pp->current->comm);
208 p->start_time = timestamp;
210 p->current->start_time = timestamp;
211 p->current->state_since = timestamp;
215 static void pid_exit(int pid, u64 timestamp)
218 p = find_create_pid(pid);
219 p->end_time = timestamp;
221 p->current->end_time = timestamp;
225 pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end)
228 struct per_pidcomm *c;
229 struct cpu_sample *sample;
231 p = find_create_pid(pid);
234 c = malloc(sizeof(struct per_pidcomm));
236 memset(c, 0, sizeof(struct per_pidcomm));
242 sample = malloc(sizeof(struct cpu_sample));
243 assert(sample != NULL);
244 memset(sample, 0, sizeof(struct cpu_sample));
245 sample->start_time = start;
246 sample->end_time = end;
248 sample->next = c->samples;
252 if (sample->type == TYPE_RUNNING && end > start && start > 0) {
253 c->total_time += (end-start);
254 p->total_time += (end-start);
257 if (c->start_time == 0 || c->start_time > start)
258 c->start_time = start;
259 if (p->start_time == 0 || p->start_time > start)
260 p->start_time = start;
266 #define MAX_CPUS 4096
268 static u64 cpus_cstate_start_times[MAX_CPUS];
269 static int cpus_cstate_state[MAX_CPUS];
270 static u64 cpus_pstate_start_times[MAX_CPUS];
271 static u64 cpus_pstate_state[MAX_CPUS];
274 process_comm_event(event_t *event)
276 pid_set_comm(event->comm.pid, event->comm.comm);
280 process_fork_event(event_t *event)
282 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
287 process_exit_event(event_t *event)
289 pid_exit(event->fork.pid, event->fork.time);
297 unsigned char preempt_count;
303 struct trace_entry te;
308 #define TASK_COMM_LEN 16
309 struct wakeup_entry {
310 struct trace_entry te;
311 char comm[TASK_COMM_LEN];
318 * trace_flag_type is an enumeration that holds different
319 * states when a trace occurs. These are:
320 * IRQS_OFF - interrupts were disabled
321 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
322 * NEED_RESCED - reschedule is requested
323 * HARDIRQ - inside an interrupt handler
324 * SOFTIRQ - inside a softirq handler
326 enum trace_flag_type {
327 TRACE_FLAG_IRQS_OFF = 0x01,
328 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
329 TRACE_FLAG_NEED_RESCHED = 0x04,
330 TRACE_FLAG_HARDIRQ = 0x08,
331 TRACE_FLAG_SOFTIRQ = 0x10,
336 struct sched_switch {
337 struct trace_entry te;
338 char prev_comm[TASK_COMM_LEN];
341 long prev_state; /* Arjan weeps. */
342 char next_comm[TASK_COMM_LEN];
347 static void c_state_start(int cpu, u64 timestamp, int state)
349 cpus_cstate_start_times[cpu] = timestamp;
350 cpus_cstate_state[cpu] = state;
353 static void c_state_end(int cpu, u64 timestamp)
355 struct power_event *pwr;
356 pwr = malloc(sizeof(struct power_event));
359 memset(pwr, 0, sizeof(struct power_event));
361 pwr->state = cpus_cstate_state[cpu];
362 pwr->start_time = cpus_cstate_start_times[cpu];
363 pwr->end_time = timestamp;
366 pwr->next = power_events;
371 static void p_state_change(int cpu, u64 timestamp, u64 new_freq)
373 struct power_event *pwr;
374 pwr = malloc(sizeof(struct power_event));
376 if (new_freq > 8000000) /* detect invalid data */
381 memset(pwr, 0, sizeof(struct power_event));
383 pwr->state = cpus_pstate_state[cpu];
384 pwr->start_time = cpus_pstate_start_times[cpu];
385 pwr->end_time = timestamp;
388 pwr->next = power_events;
390 if (!pwr->start_time)
391 pwr->start_time = first_time;
395 cpus_pstate_state[cpu] = new_freq;
396 cpus_pstate_start_times[cpu] = timestamp;
398 if ((u64)new_freq > max_freq)
401 if (new_freq < min_freq || min_freq == 0)
404 if (new_freq == max_freq - 1000)
405 turbo_frequency = max_freq;
409 sched_wakeup(int cpu, u64 timestamp, int pid, struct trace_entry *te)
411 struct wake_event *we;
413 struct wakeup_entry *wake = (void *)te;
415 we = malloc(sizeof(struct wake_event));
419 memset(we, 0, sizeof(struct wake_event));
420 we->time = timestamp;
423 if ((te->flags & TRACE_FLAG_HARDIRQ) || (te->flags & TRACE_FLAG_SOFTIRQ))
426 we->wakee = wake->pid;
427 we->next = wake_events;
429 p = find_create_pid(we->wakee);
431 if (p && p->current && p->current->state == TYPE_NONE) {
432 p->current->state_since = timestamp;
433 p->current->state = TYPE_WAITING;
435 if (p && p->current && p->current->state == TYPE_BLOCKED) {
436 pid_put_sample(p->pid, p->current->state, cpu, p->current->state_since, timestamp);
437 p->current->state_since = timestamp;
438 p->current->state = TYPE_WAITING;
442 static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
444 struct per_pid *p = NULL, *prev_p;
445 struct sched_switch *sw = (void *)te;
448 prev_p = find_create_pid(sw->prev_pid);
450 p = find_create_pid(sw->next_pid);
452 if (prev_p->current && prev_p->current->state != TYPE_NONE)
453 pid_put_sample(sw->prev_pid, TYPE_RUNNING, cpu, prev_p->current->state_since, timestamp);
454 if (p && p->current) {
455 if (p->current->state != TYPE_NONE)
456 pid_put_sample(sw->next_pid, p->current->state, cpu, p->current->state_since, timestamp);
458 p->current->state_since = timestamp;
459 p->current->state = TYPE_RUNNING;
462 if (prev_p->current) {
463 prev_p->current->state = TYPE_NONE;
464 prev_p->current->state_since = timestamp;
465 if (sw->prev_state & 2)
466 prev_p->current->state = TYPE_BLOCKED;
467 if (sw->prev_state == 0)
468 prev_p->current->state = TYPE_WAITING;
474 process_sample_event(event_t *event)
481 struct trace_entry *te;
483 if (sample_type & PERF_SAMPLE_IP)
486 if (sample_type & PERF_SAMPLE_TID) {
487 pid = event->sample.array[cursor]>>32;
490 if (sample_type & PERF_SAMPLE_TIME) {
491 stamp = event->sample.array[cursor++];
493 if (!first_time || first_time > stamp)
495 if (last_time < stamp)
499 if (sample_type & PERF_SAMPLE_ADDR)
500 addr = event->sample.array[cursor++];
501 if (sample_type & PERF_SAMPLE_ID)
503 if (sample_type & PERF_SAMPLE_STREAM_ID)
505 if (sample_type & PERF_SAMPLE_CPU)
506 cpu = event->sample.array[cursor++] & 0xFFFFFFFF;
507 if (sample_type & PERF_SAMPLE_PERIOD)
510 te = (void *)&event->sample.array[cursor];
512 if (sample_type & PERF_SAMPLE_RAW && te->size > 0) {
514 struct power_entry *pe;
518 event_str = perf_header__find_event(te->type);
523 if (strcmp(event_str, "power:power_start") == 0)
524 c_state_start(cpu, stamp, pe->value);
526 if (strcmp(event_str, "power:power_end") == 0)
527 c_state_end(cpu, stamp);
529 if (strcmp(event_str, "power:power_frequency") == 0)
530 p_state_change(cpu, stamp, pe->value);
532 if (strcmp(event_str, "sched:sched_wakeup") == 0)
533 sched_wakeup(cpu, stamp, pid, te);
535 if (strcmp(event_str, "sched:sched_switch") == 0)
536 sched_switch(cpu, stamp, te);
542 * After the last sample we need to wrap up the current C/P state
543 * and close out each CPU for these.
545 static void end_sample_processing(void)
548 struct power_event *pwr;
550 for (cpu = 0; cpu < numcpus; cpu++) {
551 pwr = malloc(sizeof(struct power_event));
554 memset(pwr, 0, sizeof(struct power_event));
558 pwr->state = cpus_cstate_state[cpu];
559 pwr->start_time = cpus_cstate_start_times[cpu];
560 pwr->end_time = last_time;
563 pwr->next = power_events;
569 pwr = malloc(sizeof(struct power_event));
572 memset(pwr, 0, sizeof(struct power_event));
574 pwr->state = cpus_pstate_state[cpu];
575 pwr->start_time = cpus_pstate_start_times[cpu];
576 pwr->end_time = last_time;
579 pwr->next = power_events;
581 if (!pwr->start_time)
582 pwr->start_time = first_time;
584 pwr->state = min_freq;
589 static u64 sample_time(event_t *event)
594 if (sample_type & PERF_SAMPLE_IP)
596 if (sample_type & PERF_SAMPLE_TID)
598 if (sample_type & PERF_SAMPLE_TIME)
599 return event->sample.array[cursor];
605 * We first queue all events, sorted backwards by insertion.
606 * The order will get flipped later.
609 queue_sample_event(event_t *event)
611 struct sample_wrapper *copy, *prev;
614 size = event->sample.header.size + sizeof(struct sample_wrapper) + 8;
620 memset(copy, 0, size);
623 copy->timestamp = sample_time(event);
625 memcpy(©->data, event, event->sample.header.size);
627 /* insert in the right place in the list */
630 /* first sample ever */
635 if (all_samples->timestamp < copy->timestamp) {
636 /* insert at the head of the list */
637 copy->next = all_samples;
644 if (prev->next->timestamp < copy->timestamp) {
645 copy->next = prev->next;
651 /* insert at the end of the list */
657 static void sort_queued_samples(void)
659 struct sample_wrapper *cursor, *next;
661 cursor = all_samples;
666 cursor->next = all_samples;
667 all_samples = cursor;
673 * Sort the pid datastructure
675 static void sort_pids(void)
677 struct per_pid *new_list, *p, *cursor, *prev;
678 /* sort by ppid first, then by pid, lowest to highest */
687 if (new_list == NULL) {
695 if (cursor->ppid > p->ppid ||
696 (cursor->ppid == p->ppid && cursor->pid > p->pid)) {
697 /* must insert before */
699 p->next = prev->next;
712 cursor = cursor->next;
721 static void draw_c_p_states(void)
723 struct power_event *pwr;
727 * two pass drawing so that the P state bars are on top of the C state blocks
730 if (pwr->type == CSTATE)
731 svg_cstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
737 if (pwr->type == PSTATE) {
739 pwr->state = min_freq;
740 svg_pstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
746 static void draw_wakeups(void)
748 struct wake_event *we;
750 struct per_pidcomm *c;
754 int from = 0, to = 0;
755 char *task_from = NULL, *task_to = NULL;
757 /* locate the column of the waker and wakee */
760 if (p->pid == we->waker || p->pid == we->wakee) {
763 if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
764 if (p->pid == we->waker) {
768 if (p->pid == we->wakee) {
780 svg_interrupt(we->time, to);
781 else if (from && to && abs(from - to) == 1)
782 svg_wakeline(we->time, from, to);
784 svg_partial_wakeline(we->time, from, task_from, to, task_to);
789 static void draw_cpu_usage(void)
792 struct per_pidcomm *c;
793 struct cpu_sample *sample;
800 if (sample->type == TYPE_RUNNING)
801 svg_process(sample->cpu, sample->start_time, sample->end_time, "sample", c->comm);
803 sample = sample->next;
811 static void draw_process_bars(void)
814 struct per_pidcomm *c;
815 struct cpu_sample *sample;
830 svg_box(Y, p->start_time, p->end_time, "process");
833 if (sample->type == TYPE_RUNNING)
834 svg_sample(Y, sample->cpu, sample->start_time, sample->end_time, "sample");
835 if (sample->type == TYPE_BLOCKED)
836 svg_box(Y, sample->start_time, sample->end_time, "blocked");
837 if (sample->type == TYPE_WAITING)
838 svg_box(Y, sample->start_time, sample->end_time, "waiting");
839 sample = sample->next;
844 if (c->total_time > 5000000000) /* 5 seconds */
845 sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / 1000000000.0);
847 sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / 1000000.0);
849 svg_text(Y, c->start_time, comm);
859 static int determine_display_tasks(u64 threshold)
862 struct per_pidcomm *c;
868 if (p->start_time == 1)
869 p->start_time = first_time;
871 /* no exit marker, task kept running to the end */
872 if (p->end_time == 0)
873 p->end_time = last_time;
874 if (p->total_time >= threshold)
882 if (c->start_time == 1)
883 c->start_time = first_time;
885 if (c->total_time >= threshold) {
890 if (c->end_time == 0)
891 c->end_time = last_time;
902 #define TIME_THRESH 10000000
904 static void write_svg_file(const char *filename)
912 count = determine_display_tasks(TIME_THRESH);
914 /* We'd like to show at least 15 tasks; be less picky if we have fewer */
916 count = determine_display_tasks(TIME_THRESH / 10);
918 open_svg(filename, numcpus, count);
920 svg_time_grid(first_time, last_time);
923 for (i = 0; i < numcpus; i++)
924 svg_cpu_box(i, max_freq, turbo_frequency);
935 process_event(event_t *event)
938 switch (event->header.type) {
940 case PERF_EVENT_COMM:
941 return process_comm_event(event);
942 case PERF_EVENT_FORK:
943 return process_fork_event(event);
944 case PERF_EVENT_EXIT:
945 return process_exit_event(event);
946 case PERF_EVENT_SAMPLE:
947 return queue_sample_event(event);
950 * We dont process them right now but they are fine:
952 case PERF_EVENT_MMAP:
953 case PERF_EVENT_THROTTLE:
954 case PERF_EVENT_UNTHROTTLE:
964 static void process_samples(void)
966 struct sample_wrapper *cursor;
969 sort_queued_samples();
971 cursor = all_samples;
973 event = (void *)&cursor->data;
974 cursor = cursor->next;
975 process_sample_event(event);
980 static int __cmd_timechart(void)
982 int ret, rc = EXIT_FAILURE;
983 unsigned long offset = 0;
984 unsigned long head, shift;
991 input = open(input_name, O_RDONLY);
993 fprintf(stderr, " failed to open file: %s", input_name);
994 if (!strcmp(input_name, "perf.data"))
995 fprintf(stderr, " (try 'perf record' first)");
996 fprintf(stderr, "\n");
1000 ret = fstat(input, &statbuf);
1002 perror("failed to stat file");
1006 if (!statbuf.st_size) {
1007 fprintf(stderr, "zero-sized file, nothing to do!\n");
1011 header = perf_header__read(input);
1012 head = header->data_offset;
1014 sample_type = perf_header__sample_type(header);
1016 shift = page_size * (head / page_size);
1021 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1022 MAP_SHARED, input, offset);
1023 if (buf == MAP_FAILED) {
1024 perror("failed to mmap file");
1029 event = (event_t *)(buf + head);
1031 size = event->header.size;
1035 if (head + event->header.size >= page_size * mmap_window) {
1038 shift = page_size * (head / page_size);
1040 ret2 = munmap(buf, page_size * mmap_window);
1048 size = event->header.size;
1050 if (!size || process_event(event) < 0) {
1052 printf("%p [%p]: skipping unknown header type: %d\n",
1053 (void *)(offset + head),
1054 (void *)(long)(event->header.size),
1055 event->header.type);
1058 * assume we lost track of the stream, check alignment, and
1059 * increment a single u64 in the hope to catch on again 'soon'.
1062 if (unlikely(head & 7))
1070 if (offset + head >= header->data_offset + header->data_size)
1073 if (offset + head < (unsigned long)statbuf.st_size)
1083 end_sample_processing();
1087 write_svg_file(output_name);
1089 printf("Written %2.1f seconds of trace to %s.\n", (last_time - first_time) / 1000000000.0, output_name);
1094 static const char * const timechart_usage[] = {
1095 "perf timechart [<options>] {record}",
1099 static const char *record_args[] = {
1106 "-e", "power:power_start",
1107 "-e", "power:power_end",
1108 "-e", "power:power_frequency",
1109 "-e", "sched:sched_wakeup",
1110 "-e", "sched:sched_switch",
1113 static int __cmd_record(int argc, const char **argv)
1115 unsigned int rec_argc, i, j;
1116 const char **rec_argv;
1118 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1119 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1121 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1122 rec_argv[i] = strdup(record_args[i]);
1124 for (j = 1; j < (unsigned int)argc; j++, i++)
1125 rec_argv[i] = argv[j];
1127 return cmd_record(i, rec_argv, NULL);
1130 static const struct option options[] = {
1131 OPT_STRING('i', "input", &input_name, "file",
1133 OPT_STRING('o', "output", &output_name, "file",
1134 "output file name"),
1139 int cmd_timechart(int argc, const char **argv, const char *prefix __used)
1143 page_size = getpagesize();
1145 argc = parse_options(argc, argv, options, timechart_usage,
1146 PARSE_OPT_STOP_AT_NON_OPTION);
1148 if (argc && !strncmp(argv[0], "rec", 3))
1149 return __cmd_record(argc, argv);
1151 usage_with_options(timechart_usage, options);
1155 return __cmd_timechart();