]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/util/session.c
perf session: Remove impossible condition check
[karo-tx-linux.git] / tools / perf / util / session.c
1 #define _FILE_OFFSET_BITS 64
2
3 #include <linux/kernel.h>
4
5 #include <byteswap.h>
6 #include <unistd.h>
7 #include <sys/types.h>
8 #include <sys/mman.h>
9
10 #include "evlist.h"
11 #include "evsel.h"
12 #include "session.h"
13 #include "tool.h"
14 #include "sort.h"
15 #include "util.h"
16 #include "cpumap.h"
17
18 static int perf_session__open(struct perf_session *self, bool force)
19 {
20         struct stat input_stat;
21
22         if (!strcmp(self->filename, "-")) {
23                 self->fd_pipe = true;
24                 self->fd = STDIN_FILENO;
25
26                 if (perf_session__read_header(self, self->fd) < 0)
27                         pr_err("incompatible file format");
28
29                 return 0;
30         }
31
32         self->fd = open(self->filename, O_RDONLY);
33         if (self->fd < 0) {
34                 int err = errno;
35
36                 pr_err("failed to open %s: %s", self->filename, strerror(err));
37                 if (err == ENOENT && !strcmp(self->filename, "perf.data"))
38                         pr_err("  (try 'perf record' first)");
39                 pr_err("\n");
40                 return -errno;
41         }
42
43         if (fstat(self->fd, &input_stat) < 0)
44                 goto out_close;
45
46         if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
47                 pr_err("file %s not owned by current user or root\n",
48                        self->filename);
49                 goto out_close;
50         }
51
52         if (!input_stat.st_size) {
53                 pr_info("zero-sized file (%s), nothing to do!\n",
54                         self->filename);
55                 goto out_close;
56         }
57
58         if (perf_session__read_header(self, self->fd) < 0) {
59                 pr_err("incompatible file format");
60                 goto out_close;
61         }
62
63         if (!perf_evlist__valid_sample_type(self->evlist)) {
64                 pr_err("non matching sample_type");
65                 goto out_close;
66         }
67
68         if (!perf_evlist__valid_sample_id_all(self->evlist)) {
69                 pr_err("non matching sample_id_all");
70                 goto out_close;
71         }
72
73         self->size = input_stat.st_size;
74         return 0;
75
76 out_close:
77         close(self->fd);
78         self->fd = -1;
79         return -1;
80 }
81
82 void perf_session__update_sample_type(struct perf_session *self)
83 {
84         self->sample_type = perf_evlist__sample_type(self->evlist);
85         self->sample_size = __perf_evsel__sample_size(self->sample_type);
86         self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
87         self->id_hdr_size = perf_evlist__id_hdr_size(self->evlist);
88         self->host_machine.id_hdr_size = self->id_hdr_size;
89 }
90
91 int perf_session__create_kernel_maps(struct perf_session *self)
92 {
93         int ret = machine__create_kernel_maps(&self->host_machine);
94
95         if (ret >= 0)
96                 ret = machines__create_guest_kernel_maps(&self->machines);
97         return ret;
98 }
99
100 static void perf_session__destroy_kernel_maps(struct perf_session *self)
101 {
102         machine__destroy_kernel_maps(&self->host_machine);
103         machines__destroy_guest_kernel_maps(&self->machines);
104 }
105
106 struct perf_session *perf_session__new(const char *filename, int mode,
107                                        bool force, bool repipe,
108                                        struct perf_tool *tool)
109 {
110         struct perf_session *self;
111         struct stat st;
112         size_t len;
113
114         if (!filename || !strlen(filename)) {
115                 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
116                         filename = "-";
117                 else
118                         filename = "perf.data";
119         }
120
121         len = strlen(filename);
122         self = zalloc(sizeof(*self) + len);
123
124         if (self == NULL)
125                 goto out;
126
127         memcpy(self->filename, filename, len);
128         /*
129          * On 64bit we can mmap the data file in one go. No need for tiny mmap
130          * slices. On 32bit we use 32MB.
131          */
132 #if BITS_PER_LONG == 64
133         self->mmap_window = ULLONG_MAX;
134 #else
135         self->mmap_window = 32 * 1024 * 1024ULL;
136 #endif
137         self->machines = RB_ROOT;
138         self->repipe = repipe;
139         INIT_LIST_HEAD(&self->ordered_samples.samples);
140         INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
141         INIT_LIST_HEAD(&self->ordered_samples.to_free);
142         machine__init(&self->host_machine, "", HOST_KERNEL_ID);
143
144         if (mode == O_RDONLY) {
145                 if (perf_session__open(self, force) < 0)
146                         goto out_delete;
147                 perf_session__update_sample_type(self);
148         } else if (mode == O_WRONLY) {
149                 /*
150                  * In O_RDONLY mode this will be performed when reading the
151                  * kernel MMAP event, in perf_event__process_mmap().
152                  */
153                 if (perf_session__create_kernel_maps(self) < 0)
154                         goto out_delete;
155         }
156
157         if (tool && tool->ordering_requires_timestamps &&
158             tool->ordered_samples && !self->sample_id_all) {
159                 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
160                 tool->ordered_samples = false;
161         }
162
163 out:
164         return self;
165 out_delete:
166         perf_session__delete(self);
167         return NULL;
168 }
169
170 static void machine__delete_dead_threads(struct machine *machine)
171 {
172         struct thread *n, *t;
173
174         list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
175                 list_del(&t->node);
176                 thread__delete(t);
177         }
178 }
179
180 static void perf_session__delete_dead_threads(struct perf_session *session)
181 {
182         machine__delete_dead_threads(&session->host_machine);
183 }
184
185 static void machine__delete_threads(struct machine *self)
186 {
187         struct rb_node *nd = rb_first(&self->threads);
188
189         while (nd) {
190                 struct thread *t = rb_entry(nd, struct thread, rb_node);
191
192                 rb_erase(&t->rb_node, &self->threads);
193                 nd = rb_next(nd);
194                 thread__delete(t);
195         }
196 }
197
198 static void perf_session__delete_threads(struct perf_session *session)
199 {
200         machine__delete_threads(&session->host_machine);
201 }
202
203 void perf_session__delete(struct perf_session *self)
204 {
205         perf_session__destroy_kernel_maps(self);
206         perf_session__delete_dead_threads(self);
207         perf_session__delete_threads(self);
208         machine__exit(&self->host_machine);
209         close(self->fd);
210         free(self);
211 }
212
213 void machine__remove_thread(struct machine *self, struct thread *th)
214 {
215         self->last_match = NULL;
216         rb_erase(&th->rb_node, &self->threads);
217         /*
218          * We may have references to this thread, for instance in some hist_entry
219          * instances, so just move them to a separate list.
220          */
221         list_add_tail(&th->node, &self->dead_threads);
222 }
223
224 static bool symbol__match_parent_regex(struct symbol *sym)
225 {
226         if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
227                 return 1;
228
229         return 0;
230 }
231
232 int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
233                                struct thread *thread,
234                                struct ip_callchain *chain,
235                                struct symbol **parent)
236 {
237         u8 cpumode = PERF_RECORD_MISC_USER;
238         unsigned int i;
239         int err;
240
241         callchain_cursor_reset(&evsel->hists.callchain_cursor);
242
243         for (i = 0; i < chain->nr; i++) {
244                 u64 ip;
245                 struct addr_location al;
246
247                 if (callchain_param.order == ORDER_CALLEE)
248                         ip = chain->ips[i];
249                 else
250                         ip = chain->ips[chain->nr - i - 1];
251
252                 if (ip >= PERF_CONTEXT_MAX) {
253                         switch (ip) {
254                         case PERF_CONTEXT_HV:
255                                 cpumode = PERF_RECORD_MISC_HYPERVISOR;  break;
256                         case PERF_CONTEXT_KERNEL:
257                                 cpumode = PERF_RECORD_MISC_KERNEL;      break;
258                         case PERF_CONTEXT_USER:
259                                 cpumode = PERF_RECORD_MISC_USER;        break;
260                         default:
261                                 break;
262                         }
263                         continue;
264                 }
265
266                 al.filtered = false;
267                 thread__find_addr_location(thread, self, cpumode,
268                                            MAP__FUNCTION, ip, &al, NULL);
269                 if (al.sym != NULL) {
270                         if (sort__has_parent && !*parent &&
271                             symbol__match_parent_regex(al.sym))
272                                 *parent = al.sym;
273                         if (!symbol_conf.use_callchain)
274                                 break;
275                 }
276
277                 err = callchain_cursor_append(&evsel->hists.callchain_cursor,
278                                               ip, al.map, al.sym);
279                 if (err)
280                         return err;
281         }
282
283         return 0;
284 }
285
286 static int process_event_synth_tracing_data_stub(union perf_event *event __used,
287                                                  struct perf_session *session __used)
288 {
289         dump_printf(": unhandled!\n");
290         return 0;
291 }
292
293 static int process_event_synth_attr_stub(union perf_event *event __used,
294                                          struct perf_evlist **pevlist __used)
295 {
296         dump_printf(": unhandled!\n");
297         return 0;
298 }
299
300 static int process_event_sample_stub(struct perf_tool *tool __used,
301                                      union perf_event *event __used,
302                                      struct perf_sample *sample __used,
303                                      struct perf_evsel *evsel __used,
304                                      struct machine *machine __used)
305 {
306         dump_printf(": unhandled!\n");
307         return 0;
308 }
309
310 static int process_event_stub(struct perf_tool *tool __used,
311                               union perf_event *event __used,
312                               struct perf_sample *sample __used,
313                               struct machine *machine __used)
314 {
315         dump_printf(": unhandled!\n");
316         return 0;
317 }
318
319 static int process_finished_round_stub(struct perf_tool *tool __used,
320                                        union perf_event *event __used,
321                                        struct perf_session *perf_session __used)
322 {
323         dump_printf(": unhandled!\n");
324         return 0;
325 }
326
327 static int process_event_type_stub(struct perf_tool *tool __used,
328                                    union perf_event *event __used)
329 {
330         dump_printf(": unhandled!\n");
331         return 0;
332 }
333
334 static int process_finished_round(struct perf_tool *tool,
335                                   union perf_event *event,
336                                   struct perf_session *session);
337
338 static void perf_tool__fill_defaults(struct perf_tool *tool)
339 {
340         if (tool->sample == NULL)
341                 tool->sample = process_event_sample_stub;
342         if (tool->mmap == NULL)
343                 tool->mmap = process_event_stub;
344         if (tool->comm == NULL)
345                 tool->comm = process_event_stub;
346         if (tool->fork == NULL)
347                 tool->fork = process_event_stub;
348         if (tool->exit == NULL)
349                 tool->exit = process_event_stub;
350         if (tool->lost == NULL)
351                 tool->lost = perf_event__process_lost;
352         if (tool->read == NULL)
353                 tool->read = process_event_sample_stub;
354         if (tool->throttle == NULL)
355                 tool->throttle = process_event_stub;
356         if (tool->unthrottle == NULL)
357                 tool->unthrottle = process_event_stub;
358         if (tool->attr == NULL)
359                 tool->attr = process_event_synth_attr_stub;
360         if (tool->event_type == NULL)
361                 tool->event_type = process_event_type_stub;
362         if (tool->tracing_data == NULL)
363                 tool->tracing_data = process_event_synth_tracing_data_stub;
364         if (tool->build_id == NULL)
365                 tool->build_id = process_finished_round_stub;
366         if (tool->finished_round == NULL) {
367                 if (tool->ordered_samples)
368                         tool->finished_round = process_finished_round;
369                 else
370                         tool->finished_round = process_finished_round_stub;
371         }
372 }
373
374 void mem_bswap_64(void *src, int byte_size)
375 {
376         u64 *m = src;
377
378         while (byte_size > 0) {
379                 *m = bswap_64(*m);
380                 byte_size -= sizeof(u64);
381                 ++m;
382         }
383 }
384
385 static void perf_event__all64_swap(union perf_event *event)
386 {
387         struct perf_event_header *hdr = &event->header;
388         mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
389 }
390
391 static void perf_event__comm_swap(union perf_event *event)
392 {
393         event->comm.pid = bswap_32(event->comm.pid);
394         event->comm.tid = bswap_32(event->comm.tid);
395 }
396
397 static void perf_event__mmap_swap(union perf_event *event)
398 {
399         event->mmap.pid   = bswap_32(event->mmap.pid);
400         event->mmap.tid   = bswap_32(event->mmap.tid);
401         event->mmap.start = bswap_64(event->mmap.start);
402         event->mmap.len   = bswap_64(event->mmap.len);
403         event->mmap.pgoff = bswap_64(event->mmap.pgoff);
404 }
405
406 static void perf_event__task_swap(union perf_event *event)
407 {
408         event->fork.pid  = bswap_32(event->fork.pid);
409         event->fork.tid  = bswap_32(event->fork.tid);
410         event->fork.ppid = bswap_32(event->fork.ppid);
411         event->fork.ptid = bswap_32(event->fork.ptid);
412         event->fork.time = bswap_64(event->fork.time);
413 }
414
415 static void perf_event__read_swap(union perf_event *event)
416 {
417         event->read.pid          = bswap_32(event->read.pid);
418         event->read.tid          = bswap_32(event->read.tid);
419         event->read.value        = bswap_64(event->read.value);
420         event->read.time_enabled = bswap_64(event->read.time_enabled);
421         event->read.time_running = bswap_64(event->read.time_running);
422         event->read.id           = bswap_64(event->read.id);
423 }
424
425 /* exported for swapping attributes in file header */
426 void perf_event__attr_swap(struct perf_event_attr *attr)
427 {
428         attr->type              = bswap_32(attr->type);
429         attr->size              = bswap_32(attr->size);
430         attr->config            = bswap_64(attr->config);
431         attr->sample_period     = bswap_64(attr->sample_period);
432         attr->sample_type       = bswap_64(attr->sample_type);
433         attr->read_format       = bswap_64(attr->read_format);
434         attr->wakeup_events     = bswap_32(attr->wakeup_events);
435         attr->bp_type           = bswap_32(attr->bp_type);
436         attr->bp_addr           = bswap_64(attr->bp_addr);
437         attr->bp_len            = bswap_64(attr->bp_len);
438 }
439
440 static void perf_event__hdr_attr_swap(union perf_event *event)
441 {
442         size_t size;
443
444         perf_event__attr_swap(&event->attr.attr);
445
446         size = event->header.size;
447         size -= (void *)&event->attr.id - (void *)event;
448         mem_bswap_64(event->attr.id, size);
449 }
450
451 static void perf_event__event_type_swap(union perf_event *event)
452 {
453         event->event_type.event_type.event_id =
454                 bswap_64(event->event_type.event_type.event_id);
455 }
456
457 static void perf_event__tracing_data_swap(union perf_event *event)
458 {
459         event->tracing_data.size = bswap_32(event->tracing_data.size);
460 }
461
462 typedef void (*perf_event__swap_op)(union perf_event *event);
463
464 static perf_event__swap_op perf_event__swap_ops[] = {
465         [PERF_RECORD_MMAP]                = perf_event__mmap_swap,
466         [PERF_RECORD_COMM]                = perf_event__comm_swap,
467         [PERF_RECORD_FORK]                = perf_event__task_swap,
468         [PERF_RECORD_EXIT]                = perf_event__task_swap,
469         [PERF_RECORD_LOST]                = perf_event__all64_swap,
470         [PERF_RECORD_READ]                = perf_event__read_swap,
471         [PERF_RECORD_SAMPLE]              = perf_event__all64_swap,
472         [PERF_RECORD_HEADER_ATTR]         = perf_event__hdr_attr_swap,
473         [PERF_RECORD_HEADER_EVENT_TYPE]   = perf_event__event_type_swap,
474         [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
475         [PERF_RECORD_HEADER_BUILD_ID]     = NULL,
476         [PERF_RECORD_HEADER_MAX]          = NULL,
477 };
478
479 struct sample_queue {
480         u64                     timestamp;
481         u64                     file_offset;
482         union perf_event        *event;
483         struct list_head        list;
484 };
485
486 static void perf_session_free_sample_buffers(struct perf_session *session)
487 {
488         struct ordered_samples *os = &session->ordered_samples;
489
490         while (!list_empty(&os->to_free)) {
491                 struct sample_queue *sq;
492
493                 sq = list_entry(os->to_free.next, struct sample_queue, list);
494                 list_del(&sq->list);
495                 free(sq);
496         }
497 }
498
499 static int perf_session_deliver_event(struct perf_session *session,
500                                       union perf_event *event,
501                                       struct perf_sample *sample,
502                                       struct perf_tool *tool,
503                                       u64 file_offset);
504
505 static void flush_sample_queue(struct perf_session *s,
506                                struct perf_tool *tool)
507 {
508         struct ordered_samples *os = &s->ordered_samples;
509         struct list_head *head = &os->samples;
510         struct sample_queue *tmp, *iter;
511         struct perf_sample sample;
512         u64 limit = os->next_flush;
513         u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
514         unsigned idx = 0, progress_next = os->nr_samples / 16;
515         int ret;
516
517         if (!tool->ordered_samples || !limit)
518                 return;
519
520         list_for_each_entry_safe(iter, tmp, head, list) {
521                 if (iter->timestamp > limit)
522                         break;
523
524                 ret = perf_session__parse_sample(s, iter->event, &sample);
525                 if (ret)
526                         pr_err("Can't parse sample, err = %d\n", ret);
527                 else
528                         perf_session_deliver_event(s, iter->event, &sample, tool,
529                                                    iter->file_offset);
530
531                 os->last_flush = iter->timestamp;
532                 list_del(&iter->list);
533                 list_add(&iter->list, &os->sample_cache);
534                 if (++idx >= progress_next) {
535                         progress_next += os->nr_samples / 16;
536                         ui_progress__update(idx, os->nr_samples,
537                                             "Processing time ordered events...");
538                 }
539         }
540
541         if (list_empty(head)) {
542                 os->last_sample = NULL;
543         } else if (last_ts <= limit) {
544                 os->last_sample =
545                         list_entry(head->prev, struct sample_queue, list);
546         }
547
548         os->nr_samples = 0;
549 }
550
551 /*
552  * When perf record finishes a pass on every buffers, it records this pseudo
553  * event.
554  * We record the max timestamp t found in the pass n.
555  * Assuming these timestamps are monotonic across cpus, we know that if
556  * a buffer still has events with timestamps below t, they will be all
557  * available and then read in the pass n + 1.
558  * Hence when we start to read the pass n + 2, we can safely flush every
559  * events with timestamps below t.
560  *
561  *    ============ PASS n =================
562  *       CPU 0         |   CPU 1
563  *                     |
564  *    cnt1 timestamps  |   cnt2 timestamps
565  *          1          |         2
566  *          2          |         3
567  *          -          |         4  <--- max recorded
568  *
569  *    ============ PASS n + 1 ==============
570  *       CPU 0         |   CPU 1
571  *                     |
572  *    cnt1 timestamps  |   cnt2 timestamps
573  *          3          |         5
574  *          4          |         6
575  *          5          |         7 <---- max recorded
576  *
577  *      Flush every events below timestamp 4
578  *
579  *    ============ PASS n + 2 ==============
580  *       CPU 0         |   CPU 1
581  *                     |
582  *    cnt1 timestamps  |   cnt2 timestamps
583  *          6          |         8
584  *          7          |         9
585  *          -          |         10
586  *
587  *      Flush every events below timestamp 7
588  *      etc...
589  */
590 static int process_finished_round(struct perf_tool *tool,
591                                   union perf_event *event __used,
592                                   struct perf_session *session)
593 {
594         flush_sample_queue(session, tool);
595         session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
596
597         return 0;
598 }
599
600 /* The queue is ordered by time */
601 static void __queue_event(struct sample_queue *new, struct perf_session *s)
602 {
603         struct ordered_samples *os = &s->ordered_samples;
604         struct sample_queue *sample = os->last_sample;
605         u64 timestamp = new->timestamp;
606         struct list_head *p;
607
608         ++os->nr_samples;
609         os->last_sample = new;
610
611         if (!sample) {
612                 list_add(&new->list, &os->samples);
613                 os->max_timestamp = timestamp;
614                 return;
615         }
616
617         /*
618          * last_sample might point to some random place in the list as it's
619          * the last queued event. We expect that the new event is close to
620          * this.
621          */
622         if (sample->timestamp <= timestamp) {
623                 while (sample->timestamp <= timestamp) {
624                         p = sample->list.next;
625                         if (p == &os->samples) {
626                                 list_add_tail(&new->list, &os->samples);
627                                 os->max_timestamp = timestamp;
628                                 return;
629                         }
630                         sample = list_entry(p, struct sample_queue, list);
631                 }
632                 list_add_tail(&new->list, &sample->list);
633         } else {
634                 while (sample->timestamp > timestamp) {
635                         p = sample->list.prev;
636                         if (p == &os->samples) {
637                                 list_add(&new->list, &os->samples);
638                                 return;
639                         }
640                         sample = list_entry(p, struct sample_queue, list);
641                 }
642                 list_add(&new->list, &sample->list);
643         }
644 }
645
646 #define MAX_SAMPLE_BUFFER       (64 * 1024 / sizeof(struct sample_queue))
647
648 static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
649                                     struct perf_sample *sample, u64 file_offset)
650 {
651         struct ordered_samples *os = &s->ordered_samples;
652         struct list_head *sc = &os->sample_cache;
653         u64 timestamp = sample->time;
654         struct sample_queue *new;
655
656         if (!timestamp || timestamp == ~0ULL)
657                 return -ETIME;
658
659         if (timestamp < s->ordered_samples.last_flush) {
660                 printf("Warning: Timestamp below last timeslice flush\n");
661                 return -EINVAL;
662         }
663
664         if (!list_empty(sc)) {
665                 new = list_entry(sc->next, struct sample_queue, list);
666                 list_del(&new->list);
667         } else if (os->sample_buffer) {
668                 new = os->sample_buffer + os->sample_buffer_idx;
669                 if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
670                         os->sample_buffer = NULL;
671         } else {
672                 os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
673                 if (!os->sample_buffer)
674                         return -ENOMEM;
675                 list_add(&os->sample_buffer->list, &os->to_free);
676                 os->sample_buffer_idx = 2;
677                 new = os->sample_buffer + 1;
678         }
679
680         new->timestamp = timestamp;
681         new->file_offset = file_offset;
682         new->event = event;
683
684         __queue_event(new, s);
685
686         return 0;
687 }
688
689 static void callchain__printf(struct perf_sample *sample)
690 {
691         unsigned int i;
692
693         printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
694
695         for (i = 0; i < sample->callchain->nr; i++)
696                 printf("..... %2d: %016" PRIx64 "\n",
697                        i, sample->callchain->ips[i]);
698 }
699
700 static void perf_session__print_tstamp(struct perf_session *session,
701                                        union perf_event *event,
702                                        struct perf_sample *sample)
703 {
704         if (event->header.type != PERF_RECORD_SAMPLE &&
705             !session->sample_id_all) {
706                 fputs("-1 -1 ", stdout);
707                 return;
708         }
709
710         if ((session->sample_type & PERF_SAMPLE_CPU))
711                 printf("%u ", sample->cpu);
712
713         if (session->sample_type & PERF_SAMPLE_TIME)
714                 printf("%" PRIu64 " ", sample->time);
715 }
716
717 static void dump_event(struct perf_session *session, union perf_event *event,
718                        u64 file_offset, struct perf_sample *sample)
719 {
720         if (!dump_trace)
721                 return;
722
723         printf("\n%#" PRIx64 " [%#x]: event: %d\n",
724                file_offset, event->header.size, event->header.type);
725
726         trace_event(event);
727
728         if (sample)
729                 perf_session__print_tstamp(session, event, sample);
730
731         printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
732                event->header.size, perf_event__name(event->header.type));
733 }
734
735 static void dump_sample(struct perf_session *session, union perf_event *event,
736                         struct perf_sample *sample)
737 {
738         if (!dump_trace)
739                 return;
740
741         printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
742                event->header.misc, sample->pid, sample->tid, sample->ip,
743                sample->period, sample->addr);
744
745         if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
746                 callchain__printf(sample);
747 }
748
749 static struct machine *
750         perf_session__find_machine_for_cpumode(struct perf_session *session,
751                                                union perf_event *event)
752 {
753         const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
754
755         if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest)
756                 return perf_session__find_machine(session, event->ip.pid);
757
758         return perf_session__find_host_machine(session);
759 }
760
761 static int perf_session_deliver_event(struct perf_session *session,
762                                       union perf_event *event,
763                                       struct perf_sample *sample,
764                                       struct perf_tool *tool,
765                                       u64 file_offset)
766 {
767         struct perf_evsel *evsel;
768         struct machine *machine;
769
770         dump_event(session, event, file_offset, sample);
771
772         evsel = perf_evlist__id2evsel(session->evlist, sample->id);
773         if (evsel != NULL && event->header.type != PERF_RECORD_SAMPLE) {
774                 /*
775                  * XXX We're leaving PERF_RECORD_SAMPLE unnacounted here
776                  * because the tools right now may apply filters, discarding
777                  * some of the samples. For consistency, in the future we
778                  * should have something like nr_filtered_samples and remove
779                  * the sample->period from total_sample_period, etc, KISS for
780                  * now tho.
781                  *
782                  * Also testing against NULL allows us to handle files without
783                  * attr.sample_id_all and/or without PERF_SAMPLE_ID. In the
784                  * future probably it'll be a good idea to restrict event
785                  * processing via perf_session to files with both set.
786                  */
787                 hists__inc_nr_events(&evsel->hists, event->header.type);
788         }
789
790         machine = perf_session__find_machine_for_cpumode(session, event);
791
792         switch (event->header.type) {
793         case PERF_RECORD_SAMPLE:
794                 dump_sample(session, event, sample);
795                 if (evsel == NULL) {
796                         ++session->hists.stats.nr_unknown_id;
797                         return -1;
798                 }
799                 return tool->sample(tool, event, sample, evsel, machine);
800         case PERF_RECORD_MMAP:
801                 return tool->mmap(tool, event, sample, machine);
802         case PERF_RECORD_COMM:
803                 return tool->comm(tool, event, sample, machine);
804         case PERF_RECORD_FORK:
805                 return tool->fork(tool, event, sample, machine);
806         case PERF_RECORD_EXIT:
807                 return tool->exit(tool, event, sample, machine);
808         case PERF_RECORD_LOST:
809                 if (tool->lost == perf_event__process_lost)
810                         session->hists.stats.total_lost += event->lost.lost;
811                 return tool->lost(tool, event, sample, machine);
812         case PERF_RECORD_READ:
813                 return tool->read(tool, event, sample, evsel, machine);
814         case PERF_RECORD_THROTTLE:
815                 return tool->throttle(tool, event, sample, machine);
816         case PERF_RECORD_UNTHROTTLE:
817                 return tool->unthrottle(tool, event, sample, machine);
818         default:
819                 ++session->hists.stats.nr_unknown_events;
820                 return -1;
821         }
822 }
823
824 static int perf_session__preprocess_sample(struct perf_session *session,
825                                            union perf_event *event, struct perf_sample *sample)
826 {
827         if (event->header.type != PERF_RECORD_SAMPLE ||
828             !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
829                 return 0;
830
831         if (!ip_callchain__valid(sample->callchain, event)) {
832                 pr_debug("call-chain problem with event, skipping it.\n");
833                 ++session->hists.stats.nr_invalid_chains;
834                 session->hists.stats.total_invalid_chains += sample->period;
835                 return -EINVAL;
836         }
837         return 0;
838 }
839
840 static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
841                                             struct perf_tool *tool, u64 file_offset)
842 {
843         int err;
844
845         dump_event(session, event, file_offset, NULL);
846
847         /* These events are processed right away */
848         switch (event->header.type) {
849         case PERF_RECORD_HEADER_ATTR:
850                 err = tool->attr(event, &session->evlist);
851                 if (err == 0)
852                         perf_session__update_sample_type(session);
853                 return err;
854         case PERF_RECORD_HEADER_EVENT_TYPE:
855                 return tool->event_type(tool, event);
856         case PERF_RECORD_HEADER_TRACING_DATA:
857                 /* setup for reading amidst mmap */
858                 lseek(session->fd, file_offset, SEEK_SET);
859                 return tool->tracing_data(event, session);
860         case PERF_RECORD_HEADER_BUILD_ID:
861                 return tool->build_id(tool, event, session);
862         case PERF_RECORD_FINISHED_ROUND:
863                 return tool->finished_round(tool, event, session);
864         default:
865                 return -EINVAL;
866         }
867 }
868
869 static int perf_session__process_event(struct perf_session *session,
870                                        union perf_event *event,
871                                        struct perf_tool *tool,
872                                        u64 file_offset)
873 {
874         struct perf_sample sample;
875         int ret;
876
877         if (session->header.needs_swap &&
878             perf_event__swap_ops[event->header.type])
879                 perf_event__swap_ops[event->header.type](event);
880
881         if (event->header.type >= PERF_RECORD_HEADER_MAX)
882                 return -EINVAL;
883
884         hists__inc_nr_events(&session->hists, event->header.type);
885
886         if (event->header.type >= PERF_RECORD_USER_TYPE_START)
887                 return perf_session__process_user_event(session, event, tool, file_offset);
888
889         /*
890          * For all kernel events we get the sample data
891          */
892         ret = perf_session__parse_sample(session, event, &sample);
893         if (ret)
894                 return ret;
895
896         /* Preprocess sample records - precheck callchains */
897         if (perf_session__preprocess_sample(session, event, &sample))
898                 return 0;
899
900         if (tool->ordered_samples) {
901                 ret = perf_session_queue_event(session, event, &sample,
902                                                file_offset);
903                 if (ret != -ETIME)
904                         return ret;
905         }
906
907         return perf_session_deliver_event(session, event, &sample, tool,
908                                           file_offset);
909 }
910
911 void perf_event_header__bswap(struct perf_event_header *self)
912 {
913         self->type = bswap_32(self->type);
914         self->misc = bswap_16(self->misc);
915         self->size = bswap_16(self->size);
916 }
917
918 struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
919 {
920         return machine__findnew_thread(&session->host_machine, pid);
921 }
922
923 static struct thread *perf_session__register_idle_thread(struct perf_session *self)
924 {
925         struct thread *thread = perf_session__findnew(self, 0);
926
927         if (thread == NULL || thread__set_comm(thread, "swapper")) {
928                 pr_err("problem inserting idle task.\n");
929                 thread = NULL;
930         }
931
932         return thread;
933 }
934
935 static void perf_session__warn_about_errors(const struct perf_session *session,
936                                             const struct perf_tool *tool)
937 {
938         if (tool->lost == perf_event__process_lost &&
939             session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) {
940                 ui__warning("Processed %d events and lost %d chunks!\n\n"
941                             "Check IO/CPU overload!\n\n",
942                             session->hists.stats.nr_events[0],
943                             session->hists.stats.nr_events[PERF_RECORD_LOST]);
944         }
945
946         if (session->hists.stats.nr_unknown_events != 0) {
947                 ui__warning("Found %u unknown events!\n\n"
948                             "Is this an older tool processing a perf.data "
949                             "file generated by a more recent tool?\n\n"
950                             "If that is not the case, consider "
951                             "reporting to linux-kernel@vger.kernel.org.\n\n",
952                             session->hists.stats.nr_unknown_events);
953         }
954
955         if (session->hists.stats.nr_unknown_id != 0) {
956                 ui__warning("%u samples with id not present in the header\n",
957                             session->hists.stats.nr_unknown_id);
958         }
959
960         if (session->hists.stats.nr_invalid_chains != 0) {
961                 ui__warning("Found invalid callchains!\n\n"
962                             "%u out of %u events were discarded for this reason.\n\n"
963                             "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
964                             session->hists.stats.nr_invalid_chains,
965                             session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
966         }
967 }
968
969 #define session_done()  (*(volatile int *)(&session_done))
970 volatile int session_done;
971
972 static int __perf_session__process_pipe_events(struct perf_session *self,
973                                                struct perf_tool *tool)
974 {
975         union perf_event event;
976         uint32_t size;
977         int skip = 0;
978         u64 head;
979         int err;
980         void *p;
981
982         perf_tool__fill_defaults(tool);
983
984         head = 0;
985 more:
986         err = readn(self->fd, &event, sizeof(struct perf_event_header));
987         if (err <= 0) {
988                 if (err == 0)
989                         goto done;
990
991                 pr_err("failed to read event header\n");
992                 goto out_err;
993         }
994
995         if (self->header.needs_swap)
996                 perf_event_header__bswap(&event.header);
997
998         size = event.header.size;
999         if (size == 0)
1000                 size = 8;
1001
1002         p = &event;
1003         p += sizeof(struct perf_event_header);
1004
1005         if (size - sizeof(struct perf_event_header)) {
1006                 err = readn(self->fd, p, size - sizeof(struct perf_event_header));
1007                 if (err <= 0) {
1008                         if (err == 0) {
1009                                 pr_err("unexpected end of event stream\n");
1010                                 goto done;
1011                         }
1012
1013                         pr_err("failed to read event data\n");
1014                         goto out_err;
1015                 }
1016         }
1017
1018         if ((skip = perf_session__process_event(self, &event, tool, head)) < 0) {
1019                 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1020                             head, event.header.size, event.header.type);
1021                 /*
1022                  * assume we lost track of the stream, check alignment, and
1023                  * increment a single u64 in the hope to catch on again 'soon'.
1024                  */
1025                 if (unlikely(head & 7))
1026                         head &= ~7ULL;
1027
1028                 size = 8;
1029         }
1030
1031         head += size;
1032
1033         if (skip > 0)
1034                 head += skip;
1035
1036         if (!session_done())
1037                 goto more;
1038 done:
1039         err = 0;
1040 out_err:
1041         perf_session__warn_about_errors(self, tool);
1042         perf_session_free_sample_buffers(self);
1043         return err;
1044 }
1045
1046 static union perf_event *
1047 fetch_mmaped_event(struct perf_session *session,
1048                    u64 head, size_t mmap_size, char *buf)
1049 {
1050         union perf_event *event;
1051
1052         /*
1053          * Ensure we have enough space remaining to read
1054          * the size of the event in the headers.
1055          */
1056         if (head + sizeof(event->header) > mmap_size)
1057                 return NULL;
1058
1059         event = (union perf_event *)(buf + head);
1060
1061         if (session->header.needs_swap)
1062                 perf_event_header__bswap(&event->header);
1063
1064         if (head + event->header.size > mmap_size)
1065                 return NULL;
1066
1067         return event;
1068 }
1069
1070 int __perf_session__process_events(struct perf_session *session,
1071                                    u64 data_offset, u64 data_size,
1072                                    u64 file_size, struct perf_tool *tool)
1073 {
1074         u64 head, page_offset, file_offset, file_pos, progress_next;
1075         int err, mmap_prot, mmap_flags, map_idx = 0;
1076         size_t  page_size, mmap_size;
1077         char *buf, *mmaps[8];
1078         union perf_event *event;
1079         uint32_t size;
1080
1081         perf_tool__fill_defaults(tool);
1082
1083         page_size = sysconf(_SC_PAGESIZE);
1084
1085         page_offset = page_size * (data_offset / page_size);
1086         file_offset = page_offset;
1087         head = data_offset - page_offset;
1088
1089         if (data_offset + data_size < file_size)
1090                 file_size = data_offset + data_size;
1091
1092         progress_next = file_size / 16;
1093
1094         mmap_size = session->mmap_window;
1095         if (mmap_size > file_size)
1096                 mmap_size = file_size;
1097
1098         memset(mmaps, 0, sizeof(mmaps));
1099
1100         mmap_prot  = PROT_READ;
1101         mmap_flags = MAP_SHARED;
1102
1103         if (session->header.needs_swap) {
1104                 mmap_prot  |= PROT_WRITE;
1105                 mmap_flags = MAP_PRIVATE;
1106         }
1107 remap:
1108         buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
1109                    file_offset);
1110         if (buf == MAP_FAILED) {
1111                 pr_err("failed to mmap file\n");
1112                 err = -errno;
1113                 goto out_err;
1114         }
1115         mmaps[map_idx] = buf;
1116         map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
1117         file_pos = file_offset + head;
1118
1119 more:
1120         event = fetch_mmaped_event(session, head, mmap_size, buf);
1121         if (!event) {
1122                 if (mmaps[map_idx]) {
1123                         munmap(mmaps[map_idx], mmap_size);
1124                         mmaps[map_idx] = NULL;
1125                 }
1126
1127                 page_offset = page_size * (head / page_size);
1128                 file_offset += page_offset;
1129                 head -= page_offset;
1130                 goto remap;
1131         }
1132
1133         size = event->header.size;
1134
1135         if (size == 0 ||
1136             perf_session__process_event(session, event, tool, file_pos) < 0) {
1137                 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1138                             file_offset + head, event->header.size,
1139                             event->header.type);
1140                 /*
1141                  * assume we lost track of the stream, check alignment, and
1142                  * increment a single u64 in the hope to catch on again 'soon'.
1143                  */
1144                 if (unlikely(head & 7))
1145                         head &= ~7ULL;
1146
1147                 size = 8;
1148         }
1149
1150         head += size;
1151         file_pos += size;
1152
1153         if (file_pos >= progress_next) {
1154                 progress_next += file_size / 16;
1155                 ui_progress__update(file_pos, file_size,
1156                                     "Processing events...");
1157         }
1158
1159         if (file_pos < file_size)
1160                 goto more;
1161
1162         err = 0;
1163         /* do the final flush for ordered samples */
1164         session->ordered_samples.next_flush = ULLONG_MAX;
1165         flush_sample_queue(session, tool);
1166 out_err:
1167         perf_session__warn_about_errors(session, tool);
1168         perf_session_free_sample_buffers(session);
1169         return err;
1170 }
1171
1172 int perf_session__process_events(struct perf_session *self,
1173                                  struct perf_tool *tool)
1174 {
1175         int err;
1176
1177         if (perf_session__register_idle_thread(self) == NULL)
1178                 return -ENOMEM;
1179
1180         if (!self->fd_pipe)
1181                 err = __perf_session__process_events(self,
1182                                                      self->header.data_offset,
1183                                                      self->header.data_size,
1184                                                      self->size, tool);
1185         else
1186                 err = __perf_session__process_pipe_events(self, tool);
1187
1188         return err;
1189 }
1190
1191 bool perf_session__has_traces(struct perf_session *self, const char *msg)
1192 {
1193         if (!(self->sample_type & PERF_SAMPLE_RAW)) {
1194                 pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
1195                 return false;
1196         }
1197
1198         return true;
1199 }
1200
1201 int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
1202                                      const char *symbol_name, u64 addr)
1203 {
1204         char *bracket;
1205         enum map_type i;
1206         struct ref_reloc_sym *ref;
1207
1208         ref = zalloc(sizeof(struct ref_reloc_sym));
1209         if (ref == NULL)
1210                 return -ENOMEM;
1211
1212         ref->name = strdup(symbol_name);
1213         if (ref->name == NULL) {
1214                 free(ref);
1215                 return -ENOMEM;
1216         }
1217
1218         bracket = strchr(ref->name, ']');
1219         if (bracket)
1220                 *bracket = '\0';
1221
1222         ref->addr = addr;
1223
1224         for (i = 0; i < MAP__NR_TYPES; ++i) {
1225                 struct kmap *kmap = map__kmap(maps[i]);
1226                 kmap->ref_reloc_sym = ref;
1227         }
1228
1229         return 0;
1230 }
1231
1232 size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
1233 {
1234         return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
1235                __dsos__fprintf(&self->host_machine.user_dsos, fp) +
1236                machines__fprintf_dsos(&self->machines, fp);
1237 }
1238
1239 size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
1240                                           bool with_hits)
1241 {
1242         size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
1243         return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
1244 }
1245
1246 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
1247 {
1248         struct perf_evsel *pos;
1249         size_t ret = fprintf(fp, "Aggregated stats:\n");
1250
1251         ret += hists__fprintf_nr_events(&session->hists, fp);
1252
1253         list_for_each_entry(pos, &session->evlist->entries, node) {
1254                 ret += fprintf(fp, "%s stats:\n", event_name(pos));
1255                 ret += hists__fprintf_nr_events(&pos->hists, fp);
1256         }
1257
1258         return ret;
1259 }
1260
1261 size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
1262 {
1263         /*
1264          * FIXME: Here we have to actually print all the machines in this
1265          * session, not just the host...
1266          */
1267         return machine__fprintf(&session->host_machine, fp);
1268 }
1269
1270 void perf_session__remove_thread(struct perf_session *session,
1271                                  struct thread *th)
1272 {
1273         /*
1274          * FIXME: This one makes no sense, we need to remove the thread from
1275          * the machine it belongs to, perf_session can have many machines, so
1276          * doing it always on ->host_machine is wrong.  Fix when auditing all
1277          * the 'perf kvm' code.
1278          */
1279         machine__remove_thread(&session->host_machine, th);
1280 }
1281
1282 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
1283                                               unsigned int type)
1284 {
1285         struct perf_evsel *pos;
1286
1287         list_for_each_entry(pos, &session->evlist->entries, node) {
1288                 if (pos->attr.type == type)
1289                         return pos;
1290         }
1291         return NULL;
1292 }
1293
1294 void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1295                           struct machine *machine, struct perf_evsel *evsel,
1296                           int print_sym, int print_dso)
1297 {
1298         struct addr_location al;
1299         const char *symname, *dsoname;
1300         struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
1301         struct callchain_cursor_node *node;
1302
1303         if (perf_event__preprocess_sample(event, machine, &al, sample,
1304                                           NULL) < 0) {
1305                 error("problem processing %d event, skipping it.\n",
1306                         event->header.type);
1307                 return;
1308         }
1309
1310         if (symbol_conf.use_callchain && sample->callchain) {
1311
1312                 if (machine__resolve_callchain(machine, evsel, al.thread,
1313                                                 sample->callchain, NULL) != 0) {
1314                         if (verbose)
1315                                 error("Failed to resolve callchain. Skipping\n");
1316                         return;
1317                 }
1318                 callchain_cursor_commit(cursor);
1319
1320                 while (1) {
1321                         node = callchain_cursor_current(cursor);
1322                         if (!node)
1323                                 break;
1324
1325                         printf("\t%16" PRIx64, node->ip);
1326                         if (print_sym) {
1327                                 if (node->sym && node->sym->name)
1328                                         symname = node->sym->name;
1329                                 else
1330                                         symname = "";
1331
1332                                 printf(" %s", symname);
1333                         }
1334                         if (print_dso) {
1335                                 if (node->map && node->map->dso && node->map->dso->name)
1336                                         dsoname = node->map->dso->name;
1337                                 else
1338                                         dsoname = "";
1339
1340                                 printf(" (%s)", dsoname);
1341                         }
1342                         printf("\n");
1343
1344                         callchain_cursor_advance(cursor);
1345                 }
1346
1347         } else {
1348                 printf("%16" PRIx64, sample->ip);
1349                 if (print_sym) {
1350                         if (al.sym && al.sym->name)
1351                                 symname = al.sym->name;
1352                         else
1353                                 symname = "";
1354
1355                         printf(" %s", symname);
1356                 }
1357
1358                 if (print_dso) {
1359                         if (al.map && al.map->dso && al.map->dso->name)
1360                                 dsoname = al.map->dso->name;
1361                         else
1362                                 dsoname = "";
1363
1364                         printf(" (%s)", dsoname);
1365                 }
1366         }
1367 }
1368
1369 int perf_session__cpu_bitmap(struct perf_session *session,
1370                              const char *cpu_list, unsigned long *cpu_bitmap)
1371 {
1372         int i;
1373         struct cpu_map *map;
1374
1375         for (i = 0; i < PERF_TYPE_MAX; ++i) {
1376                 struct perf_evsel *evsel;
1377
1378                 evsel = perf_session__find_first_evtype(session, i);
1379                 if (!evsel)
1380                         continue;
1381
1382                 if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
1383                         pr_err("File does not contain CPU events. "
1384                                "Remove -c option to proceed.\n");
1385                         return -1;
1386                 }
1387         }
1388
1389         map = cpu_map__new(cpu_list);
1390         if (map == NULL) {
1391                 pr_err("Invalid cpu_list\n");
1392                 return -1;
1393         }
1394
1395         for (i = 0; i < map->nr; i++) {
1396                 int cpu = map->map[i];
1397
1398                 if (cpu >= MAX_NR_CPUS) {
1399                         pr_err("Requested CPU %d too large. "
1400                                "Consider raising MAX_NR_CPUS\n", cpu);
1401                         return -1;
1402                 }
1403
1404                 set_bit(cpu, cpu_bitmap);
1405         }
1406
1407         return 0;
1408 }
1409
1410 void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
1411                                 bool full)
1412 {
1413         struct stat st;
1414         int ret;
1415
1416         if (session == NULL || fp == NULL)
1417                 return;
1418
1419         ret = fstat(session->fd, &st);
1420         if (ret == -1)
1421                 return;
1422
1423         fprintf(fp, "# ========\n");
1424         fprintf(fp, "# captured on: %s", ctime(&st.st_ctime));
1425         perf_header__fprintf_info(session, fp, full);
1426         fprintf(fp, "# ========\n#\n");
1427 }