]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/util/intel-pt.c
Merge tag 'iommu-updates-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git...
[karo-tx-linux.git] / tools / perf / util / intel-pt.c
1 /*
2  * intel_pt.c: Intel Processor Trace support
3  * Copyright (c) 2013-2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15
16 #include <inttypes.h>
17 #include <stdio.h>
18 #include <stdbool.h>
19 #include <errno.h>
20 #include <linux/kernel.h>
21 #include <linux/types.h>
22
23 #include "../perf.h"
24 #include "session.h"
25 #include "machine.h"
26 #include "sort.h"
27 #include "tool.h"
28 #include "event.h"
29 #include "evlist.h"
30 #include "evsel.h"
31 #include "map.h"
32 #include "color.h"
33 #include "util.h"
34 #include "thread.h"
35 #include "thread-stack.h"
36 #include "symbol.h"
37 #include "callchain.h"
38 #include "dso.h"
39 #include "debug.h"
40 #include "auxtrace.h"
41 #include "tsc.h"
42 #include "intel-pt.h"
43 #include "config.h"
44
45 #include "intel-pt-decoder/intel-pt-log.h"
46 #include "intel-pt-decoder/intel-pt-decoder.h"
47 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
48 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
49
50 #define MAX_TIMESTAMP (~0ULL)
51
52 struct intel_pt {
53         struct auxtrace auxtrace;
54         struct auxtrace_queues queues;
55         struct auxtrace_heap heap;
56         u32 auxtrace_type;
57         struct perf_session *session;
58         struct machine *machine;
59         struct perf_evsel *switch_evsel;
60         struct thread *unknown_thread;
61         bool timeless_decoding;
62         bool sampling_mode;
63         bool snapshot_mode;
64         bool per_cpu_mmaps;
65         bool have_tsc;
66         bool data_queued;
67         bool est_tsc;
68         bool sync_switch;
69         bool mispred_all;
70         int have_sched_switch;
71         u32 pmu_type;
72         u64 kernel_start;
73         u64 switch_ip;
74         u64 ptss_ip;
75
76         struct perf_tsc_conversion tc;
77         bool cap_user_time_zero;
78
79         struct itrace_synth_opts synth_opts;
80
81         bool sample_instructions;
82         u64 instructions_sample_type;
83         u64 instructions_sample_period;
84         u64 instructions_id;
85
86         bool sample_branches;
87         u32 branches_filter;
88         u64 branches_sample_type;
89         u64 branches_id;
90
91         bool sample_transactions;
92         u64 transactions_sample_type;
93         u64 transactions_id;
94
95         bool synth_needs_swap;
96
97         u64 tsc_bit;
98         u64 mtc_bit;
99         u64 mtc_freq_bits;
100         u32 tsc_ctc_ratio_n;
101         u32 tsc_ctc_ratio_d;
102         u64 cyc_bit;
103         u64 noretcomp_bit;
104         unsigned max_non_turbo_ratio;
105
106         unsigned long num_events;
107
108         char *filter;
109         struct addr_filters filts;
110 };
111
112 enum switch_state {
113         INTEL_PT_SS_NOT_TRACING,
114         INTEL_PT_SS_UNKNOWN,
115         INTEL_PT_SS_TRACING,
116         INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
117         INTEL_PT_SS_EXPECTING_SWITCH_IP,
118 };
119
120 struct intel_pt_queue {
121         struct intel_pt *pt;
122         unsigned int queue_nr;
123         struct auxtrace_buffer *buffer;
124         void *decoder;
125         const struct intel_pt_state *state;
126         struct ip_callchain *chain;
127         struct branch_stack *last_branch;
128         struct branch_stack *last_branch_rb;
129         size_t last_branch_pos;
130         union perf_event *event_buf;
131         bool on_heap;
132         bool stop;
133         bool step_through_buffers;
134         bool use_buffer_pid_tid;
135         pid_t pid, tid;
136         int cpu;
137         int switch_state;
138         pid_t next_tid;
139         struct thread *thread;
140         bool exclude_kernel;
141         bool have_sample;
142         u64 time;
143         u64 timestamp;
144         u32 flags;
145         u16 insn_len;
146         u64 last_insn_cnt;
147         char insn[INTEL_PT_INSN_BUF_SZ];
148 };
149
150 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
151                           unsigned char *buf, size_t len)
152 {
153         struct intel_pt_pkt packet;
154         size_t pos = 0;
155         int ret, pkt_len, i;
156         char desc[INTEL_PT_PKT_DESC_MAX];
157         const char *color = PERF_COLOR_BLUE;
158
159         color_fprintf(stdout, color,
160                       ". ... Intel Processor Trace data: size %zu bytes\n",
161                       len);
162
163         while (len) {
164                 ret = intel_pt_get_packet(buf, len, &packet);
165                 if (ret > 0)
166                         pkt_len = ret;
167                 else
168                         pkt_len = 1;
169                 printf(".");
170                 color_fprintf(stdout, color, "  %08x: ", pos);
171                 for (i = 0; i < pkt_len; i++)
172                         color_fprintf(stdout, color, " %02x", buf[i]);
173                 for (; i < 16; i++)
174                         color_fprintf(stdout, color, "   ");
175                 if (ret > 0) {
176                         ret = intel_pt_pkt_desc(&packet, desc,
177                                                 INTEL_PT_PKT_DESC_MAX);
178                         if (ret > 0)
179                                 color_fprintf(stdout, color, " %s\n", desc);
180                 } else {
181                         color_fprintf(stdout, color, " Bad packet!\n");
182                 }
183                 pos += pkt_len;
184                 buf += pkt_len;
185                 len -= pkt_len;
186         }
187 }
188
189 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
190                                 size_t len)
191 {
192         printf(".\n");
193         intel_pt_dump(pt, buf, len);
194 }
195
196 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
197                                    struct auxtrace_buffer *b)
198 {
199         void *start;
200
201         start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
202                                       pt->have_tsc);
203         if (!start)
204                 return -EINVAL;
205         b->use_size = b->data + b->size - start;
206         b->use_data = start;
207         return 0;
208 }
209
210 static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
211                                         struct auxtrace_queue *queue,
212                                         struct auxtrace_buffer *buffer)
213 {
214         if (queue->cpu == -1 && buffer->cpu != -1)
215                 ptq->cpu = buffer->cpu;
216
217         ptq->pid = buffer->pid;
218         ptq->tid = buffer->tid;
219
220         intel_pt_log("queue %u cpu %d pid %d tid %d\n",
221                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
222
223         thread__zput(ptq->thread);
224
225         if (ptq->tid != -1) {
226                 if (ptq->pid != -1)
227                         ptq->thread = machine__findnew_thread(ptq->pt->machine,
228                                                               ptq->pid,
229                                                               ptq->tid);
230                 else
231                         ptq->thread = machine__find_thread(ptq->pt->machine, -1,
232                                                            ptq->tid);
233         }
234 }
235
236 /* This function assumes data is processed sequentially only */
237 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
238 {
239         struct intel_pt_queue *ptq = data;
240         struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
241         struct auxtrace_queue *queue;
242
243         if (ptq->stop) {
244                 b->len = 0;
245                 return 0;
246         }
247
248         queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
249 next:
250         buffer = auxtrace_buffer__next(queue, buffer);
251         if (!buffer) {
252                 if (old_buffer)
253                         auxtrace_buffer__drop_data(old_buffer);
254                 b->len = 0;
255                 return 0;
256         }
257
258         ptq->buffer = buffer;
259
260         if (!buffer->data) {
261                 int fd = perf_data_file__fd(ptq->pt->session->file);
262
263                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
264                 if (!buffer->data)
265                         return -ENOMEM;
266         }
267
268         if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
269             intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
270                 return -ENOMEM;
271
272         if (buffer->use_data) {
273                 b->len = buffer->use_size;
274                 b->buf = buffer->use_data;
275         } else {
276                 b->len = buffer->size;
277                 b->buf = buffer->data;
278         }
279         b->ref_timestamp = buffer->reference;
280
281         /*
282          * If in snapshot mode and the buffer has no usable data, get next
283          * buffer and again check overlap against old_buffer.
284          */
285         if (ptq->pt->snapshot_mode && !b->len)
286                 goto next;
287
288         if (old_buffer)
289                 auxtrace_buffer__drop_data(old_buffer);
290
291         if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
292                                                       !buffer->consecutive)) {
293                 b->consecutive = false;
294                 b->trace_nr = buffer->buffer_nr + 1;
295         } else {
296                 b->consecutive = true;
297         }
298
299         if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
300                                         ptq->tid != buffer->tid))
301                 intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
302
303         if (ptq->step_through_buffers)
304                 ptq->stop = true;
305
306         if (!b->len)
307                 return intel_pt_get_trace(b, data);
308
309         return 0;
310 }
311
312 struct intel_pt_cache_entry {
313         struct auxtrace_cache_entry     entry;
314         u64                             insn_cnt;
315         u64                             byte_cnt;
316         enum intel_pt_insn_op           op;
317         enum intel_pt_insn_branch       branch;
318         int                             length;
319         int32_t                         rel;
320         char                            insn[INTEL_PT_INSN_BUF_SZ];
321 };
322
323 static int intel_pt_config_div(const char *var, const char *value, void *data)
324 {
325         int *d = data;
326         long val;
327
328         if (!strcmp(var, "intel-pt.cache-divisor")) {
329                 val = strtol(value, NULL, 0);
330                 if (val > 0 && val <= INT_MAX)
331                         *d = val;
332         }
333
334         return 0;
335 }
336
337 static int intel_pt_cache_divisor(void)
338 {
339         static int d;
340
341         if (d)
342                 return d;
343
344         perf_config(intel_pt_config_div, &d);
345
346         if (!d)
347                 d = 64;
348
349         return d;
350 }
351
352 static unsigned int intel_pt_cache_size(struct dso *dso,
353                                         struct machine *machine)
354 {
355         off_t size;
356
357         size = dso__data_size(dso, machine);
358         size /= intel_pt_cache_divisor();
359         if (size < 1000)
360                 return 10;
361         if (size > (1 << 21))
362                 return 21;
363         return 32 - __builtin_clz(size);
364 }
365
366 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
367                                              struct machine *machine)
368 {
369         struct auxtrace_cache *c;
370         unsigned int bits;
371
372         if (dso->auxtrace_cache)
373                 return dso->auxtrace_cache;
374
375         bits = intel_pt_cache_size(dso, machine);
376
377         /* Ignoring cache creation failure */
378         c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
379
380         dso->auxtrace_cache = c;
381
382         return c;
383 }
384
385 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
386                               u64 offset, u64 insn_cnt, u64 byte_cnt,
387                               struct intel_pt_insn *intel_pt_insn)
388 {
389         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
390         struct intel_pt_cache_entry *e;
391         int err;
392
393         if (!c)
394                 return -ENOMEM;
395
396         e = auxtrace_cache__alloc_entry(c);
397         if (!e)
398                 return -ENOMEM;
399
400         e->insn_cnt = insn_cnt;
401         e->byte_cnt = byte_cnt;
402         e->op = intel_pt_insn->op;
403         e->branch = intel_pt_insn->branch;
404         e->length = intel_pt_insn->length;
405         e->rel = intel_pt_insn->rel;
406         memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
407
408         err = auxtrace_cache__add(c, offset, &e->entry);
409         if (err)
410                 auxtrace_cache__free_entry(c, e);
411
412         return err;
413 }
414
415 static struct intel_pt_cache_entry *
416 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
417 {
418         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
419
420         if (!c)
421                 return NULL;
422
423         return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
424 }
425
426 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
427                                    uint64_t *insn_cnt_ptr, uint64_t *ip,
428                                    uint64_t to_ip, uint64_t max_insn_cnt,
429                                    void *data)
430 {
431         struct intel_pt_queue *ptq = data;
432         struct machine *machine = ptq->pt->machine;
433         struct thread *thread;
434         struct addr_location al;
435         unsigned char buf[INTEL_PT_INSN_BUF_SZ];
436         ssize_t len;
437         int x86_64;
438         u8 cpumode;
439         u64 offset, start_offset, start_ip;
440         u64 insn_cnt = 0;
441         bool one_map = true;
442
443         intel_pt_insn->length = 0;
444
445         if (to_ip && *ip == to_ip)
446                 goto out_no_cache;
447
448         if (*ip >= ptq->pt->kernel_start)
449                 cpumode = PERF_RECORD_MISC_KERNEL;
450         else
451                 cpumode = PERF_RECORD_MISC_USER;
452
453         thread = ptq->thread;
454         if (!thread) {
455                 if (cpumode != PERF_RECORD_MISC_KERNEL)
456                         return -EINVAL;
457                 thread = ptq->pt->unknown_thread;
458         }
459
460         while (1) {
461                 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
462                 if (!al.map || !al.map->dso)
463                         return -EINVAL;
464
465                 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
466                     dso__data_status_seen(al.map->dso,
467                                           DSO_DATA_STATUS_SEEN_ITRACE))
468                         return -ENOENT;
469
470                 offset = al.map->map_ip(al.map, *ip);
471
472                 if (!to_ip && one_map) {
473                         struct intel_pt_cache_entry *e;
474
475                         e = intel_pt_cache_lookup(al.map->dso, machine, offset);
476                         if (e &&
477                             (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
478                                 *insn_cnt_ptr = e->insn_cnt;
479                                 *ip += e->byte_cnt;
480                                 intel_pt_insn->op = e->op;
481                                 intel_pt_insn->branch = e->branch;
482                                 intel_pt_insn->length = e->length;
483                                 intel_pt_insn->rel = e->rel;
484                                 memcpy(intel_pt_insn->buf, e->insn,
485                                        INTEL_PT_INSN_BUF_SZ);
486                                 intel_pt_log_insn_no_data(intel_pt_insn, *ip);
487                                 return 0;
488                         }
489                 }
490
491                 start_offset = offset;
492                 start_ip = *ip;
493
494                 /* Load maps to ensure dso->is_64_bit has been updated */
495                 map__load(al.map);
496
497                 x86_64 = al.map->dso->is_64_bit;
498
499                 while (1) {
500                         len = dso__data_read_offset(al.map->dso, machine,
501                                                     offset, buf,
502                                                     INTEL_PT_INSN_BUF_SZ);
503                         if (len <= 0)
504                                 return -EINVAL;
505
506                         if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
507                                 return -EINVAL;
508
509                         intel_pt_log_insn(intel_pt_insn, *ip);
510
511                         insn_cnt += 1;
512
513                         if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
514                                 goto out;
515
516                         if (max_insn_cnt && insn_cnt >= max_insn_cnt)
517                                 goto out_no_cache;
518
519                         *ip += intel_pt_insn->length;
520
521                         if (to_ip && *ip == to_ip)
522                                 goto out_no_cache;
523
524                         if (*ip >= al.map->end)
525                                 break;
526
527                         offset += intel_pt_insn->length;
528                 }
529                 one_map = false;
530         }
531 out:
532         *insn_cnt_ptr = insn_cnt;
533
534         if (!one_map)
535                 goto out_no_cache;
536
537         /*
538          * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
539          * entries.
540          */
541         if (to_ip) {
542                 struct intel_pt_cache_entry *e;
543
544                 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
545                 if (e)
546                         return 0;
547         }
548
549         /* Ignore cache errors */
550         intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
551                            *ip - start_ip, intel_pt_insn);
552
553         return 0;
554
555 out_no_cache:
556         *insn_cnt_ptr = insn_cnt;
557         return 0;
558 }
559
560 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
561                                   uint64_t offset, const char *filename)
562 {
563         struct addr_filter *filt;
564         bool have_filter   = false;
565         bool hit_tracestop = false;
566         bool hit_filter    = false;
567
568         list_for_each_entry(filt, &pt->filts.head, list) {
569                 if (filt->start)
570                         have_filter = true;
571
572                 if ((filename && !filt->filename) ||
573                     (!filename && filt->filename) ||
574                     (filename && strcmp(filename, filt->filename)))
575                         continue;
576
577                 if (!(offset >= filt->addr && offset < filt->addr + filt->size))
578                         continue;
579
580                 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
581                              ip, offset, filename ? filename : "[kernel]",
582                              filt->start ? "filter" : "stop",
583                              filt->addr, filt->size);
584
585                 if (filt->start)
586                         hit_filter = true;
587                 else
588                         hit_tracestop = true;
589         }
590
591         if (!hit_tracestop && !hit_filter)
592                 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
593                              ip, offset, filename ? filename : "[kernel]");
594
595         return hit_tracestop || (have_filter && !hit_filter);
596 }
597
598 static int __intel_pt_pgd_ip(uint64_t ip, void *data)
599 {
600         struct intel_pt_queue *ptq = data;
601         struct thread *thread;
602         struct addr_location al;
603         u8 cpumode;
604         u64 offset;
605
606         if (ip >= ptq->pt->kernel_start)
607                 return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
608
609         cpumode = PERF_RECORD_MISC_USER;
610
611         thread = ptq->thread;
612         if (!thread)
613                 return -EINVAL;
614
615         thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
616         if (!al.map || !al.map->dso)
617                 return -EINVAL;
618
619         offset = al.map->map_ip(al.map, ip);
620
621         return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
622                                      al.map->dso->long_name);
623 }
624
625 static bool intel_pt_pgd_ip(uint64_t ip, void *data)
626 {
627         return __intel_pt_pgd_ip(ip, data) > 0;
628 }
629
630 static bool intel_pt_get_config(struct intel_pt *pt,
631                                 struct perf_event_attr *attr, u64 *config)
632 {
633         if (attr->type == pt->pmu_type) {
634                 if (config)
635                         *config = attr->config;
636                 return true;
637         }
638
639         return false;
640 }
641
642 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
643 {
644         struct perf_evsel *evsel;
645
646         evlist__for_each_entry(pt->session->evlist, evsel) {
647                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
648                     !evsel->attr.exclude_kernel)
649                         return false;
650         }
651         return true;
652 }
653
654 static bool intel_pt_return_compression(struct intel_pt *pt)
655 {
656         struct perf_evsel *evsel;
657         u64 config;
658
659         if (!pt->noretcomp_bit)
660                 return true;
661
662         evlist__for_each_entry(pt->session->evlist, evsel) {
663                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
664                     (config & pt->noretcomp_bit))
665                         return false;
666         }
667         return true;
668 }
669
670 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
671 {
672         struct perf_evsel *evsel;
673         unsigned int shift;
674         u64 config;
675
676         if (!pt->mtc_freq_bits)
677                 return 0;
678
679         for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
680                 config >>= 1;
681
682         evlist__for_each_entry(pt->session->evlist, evsel) {
683                 if (intel_pt_get_config(pt, &evsel->attr, &config))
684                         return (config & pt->mtc_freq_bits) >> shift;
685         }
686         return 0;
687 }
688
689 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
690 {
691         struct perf_evsel *evsel;
692         bool timeless_decoding = true;
693         u64 config;
694
695         if (!pt->tsc_bit || !pt->cap_user_time_zero)
696                 return true;
697
698         evlist__for_each_entry(pt->session->evlist, evsel) {
699                 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
700                         return true;
701                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
702                         if (config & pt->tsc_bit)
703                                 timeless_decoding = false;
704                         else
705                                 return true;
706                 }
707         }
708         return timeless_decoding;
709 }
710
711 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
712 {
713         struct perf_evsel *evsel;
714
715         evlist__for_each_entry(pt->session->evlist, evsel) {
716                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
717                     !evsel->attr.exclude_kernel)
718                         return true;
719         }
720         return false;
721 }
722
723 static bool intel_pt_have_tsc(struct intel_pt *pt)
724 {
725         struct perf_evsel *evsel;
726         bool have_tsc = false;
727         u64 config;
728
729         if (!pt->tsc_bit)
730                 return false;
731
732         evlist__for_each_entry(pt->session->evlist, evsel) {
733                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
734                         if (config & pt->tsc_bit)
735                                 have_tsc = true;
736                         else
737                                 return false;
738                 }
739         }
740         return have_tsc;
741 }
742
743 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
744 {
745         u64 quot, rem;
746
747         quot = ns / pt->tc.time_mult;
748         rem  = ns % pt->tc.time_mult;
749         return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
750                 pt->tc.time_mult;
751 }
752
753 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
754                                                    unsigned int queue_nr)
755 {
756         struct intel_pt_params params = { .get_trace = 0, };
757         struct intel_pt_queue *ptq;
758
759         ptq = zalloc(sizeof(struct intel_pt_queue));
760         if (!ptq)
761                 return NULL;
762
763         if (pt->synth_opts.callchain) {
764                 size_t sz = sizeof(struct ip_callchain);
765
766                 sz += pt->synth_opts.callchain_sz * sizeof(u64);
767                 ptq->chain = zalloc(sz);
768                 if (!ptq->chain)
769                         goto out_free;
770         }
771
772         if (pt->synth_opts.last_branch) {
773                 size_t sz = sizeof(struct branch_stack);
774
775                 sz += pt->synth_opts.last_branch_sz *
776                       sizeof(struct branch_entry);
777                 ptq->last_branch = zalloc(sz);
778                 if (!ptq->last_branch)
779                         goto out_free;
780                 ptq->last_branch_rb = zalloc(sz);
781                 if (!ptq->last_branch_rb)
782                         goto out_free;
783         }
784
785         ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
786         if (!ptq->event_buf)
787                 goto out_free;
788
789         ptq->pt = pt;
790         ptq->queue_nr = queue_nr;
791         ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
792         ptq->pid = -1;
793         ptq->tid = -1;
794         ptq->cpu = -1;
795         ptq->next_tid = -1;
796
797         params.get_trace = intel_pt_get_trace;
798         params.walk_insn = intel_pt_walk_next_insn;
799         params.data = ptq;
800         params.return_compression = intel_pt_return_compression(pt);
801         params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
802         params.mtc_period = intel_pt_mtc_period(pt);
803         params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
804         params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
805
806         if (pt->filts.cnt > 0)
807                 params.pgd_ip = intel_pt_pgd_ip;
808
809         if (pt->synth_opts.instructions) {
810                 if (pt->synth_opts.period) {
811                         switch (pt->synth_opts.period_type) {
812                         case PERF_ITRACE_PERIOD_INSTRUCTIONS:
813                                 params.period_type =
814                                                 INTEL_PT_PERIOD_INSTRUCTIONS;
815                                 params.period = pt->synth_opts.period;
816                                 break;
817                         case PERF_ITRACE_PERIOD_TICKS:
818                                 params.period_type = INTEL_PT_PERIOD_TICKS;
819                                 params.period = pt->synth_opts.period;
820                                 break;
821                         case PERF_ITRACE_PERIOD_NANOSECS:
822                                 params.period_type = INTEL_PT_PERIOD_TICKS;
823                                 params.period = intel_pt_ns_to_ticks(pt,
824                                                         pt->synth_opts.period);
825                                 break;
826                         default:
827                                 break;
828                         }
829                 }
830
831                 if (!params.period) {
832                         params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
833                         params.period = 1;
834                 }
835         }
836
837         ptq->decoder = intel_pt_decoder_new(&params);
838         if (!ptq->decoder)
839                 goto out_free;
840
841         return ptq;
842
843 out_free:
844         zfree(&ptq->event_buf);
845         zfree(&ptq->last_branch);
846         zfree(&ptq->last_branch_rb);
847         zfree(&ptq->chain);
848         free(ptq);
849         return NULL;
850 }
851
852 static void intel_pt_free_queue(void *priv)
853 {
854         struct intel_pt_queue *ptq = priv;
855
856         if (!ptq)
857                 return;
858         thread__zput(ptq->thread);
859         intel_pt_decoder_free(ptq->decoder);
860         zfree(&ptq->event_buf);
861         zfree(&ptq->last_branch);
862         zfree(&ptq->last_branch_rb);
863         zfree(&ptq->chain);
864         free(ptq);
865 }
866
867 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
868                                      struct auxtrace_queue *queue)
869 {
870         struct intel_pt_queue *ptq = queue->priv;
871
872         if (queue->tid == -1 || pt->have_sched_switch) {
873                 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
874                 thread__zput(ptq->thread);
875         }
876
877         if (!ptq->thread && ptq->tid != -1)
878                 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
879
880         if (ptq->thread) {
881                 ptq->pid = ptq->thread->pid_;
882                 if (queue->cpu == -1)
883                         ptq->cpu = ptq->thread->cpu;
884         }
885 }
886
887 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
888 {
889         if (ptq->state->flags & INTEL_PT_ABORT_TX) {
890                 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
891         } else if (ptq->state->flags & INTEL_PT_ASYNC) {
892                 if (ptq->state->to_ip)
893                         ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
894                                      PERF_IP_FLAG_ASYNC |
895                                      PERF_IP_FLAG_INTERRUPT;
896                 else
897                         ptq->flags = PERF_IP_FLAG_BRANCH |
898                                      PERF_IP_FLAG_TRACE_END;
899                 ptq->insn_len = 0;
900         } else {
901                 if (ptq->state->from_ip)
902                         ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
903                 else
904                         ptq->flags = PERF_IP_FLAG_BRANCH |
905                                      PERF_IP_FLAG_TRACE_BEGIN;
906                 if (ptq->state->flags & INTEL_PT_IN_TX)
907                         ptq->flags |= PERF_IP_FLAG_IN_TX;
908                 ptq->insn_len = ptq->state->insn_len;
909                 memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
910         }
911 }
912
913 static int intel_pt_setup_queue(struct intel_pt *pt,
914                                 struct auxtrace_queue *queue,
915                                 unsigned int queue_nr)
916 {
917         struct intel_pt_queue *ptq = queue->priv;
918
919         if (list_empty(&queue->head))
920                 return 0;
921
922         if (!ptq) {
923                 ptq = intel_pt_alloc_queue(pt, queue_nr);
924                 if (!ptq)
925                         return -ENOMEM;
926                 queue->priv = ptq;
927
928                 if (queue->cpu != -1)
929                         ptq->cpu = queue->cpu;
930                 ptq->tid = queue->tid;
931
932                 if (pt->sampling_mode) {
933                         if (pt->timeless_decoding)
934                                 ptq->step_through_buffers = true;
935                         if (pt->timeless_decoding || !pt->have_sched_switch)
936                                 ptq->use_buffer_pid_tid = true;
937                 }
938         }
939
940         if (!ptq->on_heap &&
941             (!pt->sync_switch ||
942              ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
943                 const struct intel_pt_state *state;
944                 int ret;
945
946                 if (pt->timeless_decoding)
947                         return 0;
948
949                 intel_pt_log("queue %u getting timestamp\n", queue_nr);
950                 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
951                              queue_nr, ptq->cpu, ptq->pid, ptq->tid);
952                 while (1) {
953                         state = intel_pt_decode(ptq->decoder);
954                         if (state->err) {
955                                 if (state->err == INTEL_PT_ERR_NODATA) {
956                                         intel_pt_log("queue %u has no timestamp\n",
957                                                      queue_nr);
958                                         return 0;
959                                 }
960                                 continue;
961                         }
962                         if (state->timestamp)
963                                 break;
964                 }
965
966                 ptq->timestamp = state->timestamp;
967                 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
968                              queue_nr, ptq->timestamp);
969                 ptq->state = state;
970                 ptq->have_sample = true;
971                 intel_pt_sample_flags(ptq);
972                 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
973                 if (ret)
974                         return ret;
975                 ptq->on_heap = true;
976         }
977
978         return 0;
979 }
980
981 static int intel_pt_setup_queues(struct intel_pt *pt)
982 {
983         unsigned int i;
984         int ret;
985
986         for (i = 0; i < pt->queues.nr_queues; i++) {
987                 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
988                 if (ret)
989                         return ret;
990         }
991         return 0;
992 }
993
994 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
995 {
996         struct branch_stack *bs_src = ptq->last_branch_rb;
997         struct branch_stack *bs_dst = ptq->last_branch;
998         size_t nr = 0;
999
1000         bs_dst->nr = bs_src->nr;
1001
1002         if (!bs_src->nr)
1003                 return;
1004
1005         nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
1006         memcpy(&bs_dst->entries[0],
1007                &bs_src->entries[ptq->last_branch_pos],
1008                sizeof(struct branch_entry) * nr);
1009
1010         if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1011                 memcpy(&bs_dst->entries[nr],
1012                        &bs_src->entries[0],
1013                        sizeof(struct branch_entry) * ptq->last_branch_pos);
1014         }
1015 }
1016
1017 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1018 {
1019         ptq->last_branch_pos = 0;
1020         ptq->last_branch_rb->nr = 0;
1021 }
1022
1023 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
1024 {
1025         const struct intel_pt_state *state = ptq->state;
1026         struct branch_stack *bs = ptq->last_branch_rb;
1027         struct branch_entry *be;
1028
1029         if (!ptq->last_branch_pos)
1030                 ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
1031
1032         ptq->last_branch_pos -= 1;
1033
1034         be              = &bs->entries[ptq->last_branch_pos];
1035         be->from        = state->from_ip;
1036         be->to          = state->to_ip;
1037         be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1038         be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1039         /* No support for mispredict */
1040         be->flags.mispred = ptq->pt->mispred_all;
1041
1042         if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1043                 bs->nr += 1;
1044 }
1045
1046 static int intel_pt_inject_event(union perf_event *event,
1047                                  struct perf_sample *sample, u64 type,
1048                                  bool swapped)
1049 {
1050         event->header.size = perf_event__sample_event_size(sample, type, 0);
1051         return perf_event__synthesize_sample(event, type, 0, sample, swapped);
1052 }
1053
1054 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
1055 {
1056         int ret;
1057         struct intel_pt *pt = ptq->pt;
1058         union perf_event *event = ptq->event_buf;
1059         struct perf_sample sample = { .ip = 0, };
1060         struct dummy_branch_stack {
1061                 u64                     nr;
1062                 struct branch_entry     entries;
1063         } dummy_bs;
1064
1065         if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
1066                 return 0;
1067
1068         if (pt->synth_opts.initial_skip &&
1069             pt->num_events++ < pt->synth_opts.initial_skip)
1070                 return 0;
1071
1072         event->sample.header.type = PERF_RECORD_SAMPLE;
1073         event->sample.header.misc = PERF_RECORD_MISC_USER;
1074         event->sample.header.size = sizeof(struct perf_event_header);
1075
1076         if (!pt->timeless_decoding)
1077                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1078
1079         sample.cpumode = PERF_RECORD_MISC_USER;
1080         sample.ip = ptq->state->from_ip;
1081         sample.pid = ptq->pid;
1082         sample.tid = ptq->tid;
1083         sample.addr = ptq->state->to_ip;
1084         sample.id = ptq->pt->branches_id;
1085         sample.stream_id = ptq->pt->branches_id;
1086         sample.period = 1;
1087         sample.cpu = ptq->cpu;
1088         sample.flags = ptq->flags;
1089         sample.insn_len = ptq->insn_len;
1090         memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1091
1092         /*
1093          * perf report cannot handle events without a branch stack when using
1094          * SORT_MODE__BRANCH so make a dummy one.
1095          */
1096         if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1097                 dummy_bs = (struct dummy_branch_stack){
1098                         .nr = 1,
1099                         .entries = {
1100                                 .from = sample.ip,
1101                                 .to = sample.addr,
1102                         },
1103                 };
1104                 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1105         }
1106
1107         if (pt->synth_opts.inject) {
1108                 ret = intel_pt_inject_event(event, &sample,
1109                                             pt->branches_sample_type,
1110                                             pt->synth_needs_swap);
1111                 if (ret)
1112                         return ret;
1113         }
1114
1115         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1116         if (ret)
1117                 pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
1118                        ret);
1119
1120         return ret;
1121 }
1122
1123 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1124 {
1125         int ret;
1126         struct intel_pt *pt = ptq->pt;
1127         union perf_event *event = ptq->event_buf;
1128         struct perf_sample sample = { .ip = 0, };
1129
1130         if (pt->synth_opts.initial_skip &&
1131             pt->num_events++ < pt->synth_opts.initial_skip)
1132                 return 0;
1133
1134         event->sample.header.type = PERF_RECORD_SAMPLE;
1135         event->sample.header.misc = PERF_RECORD_MISC_USER;
1136         event->sample.header.size = sizeof(struct perf_event_header);
1137
1138         if (!pt->timeless_decoding)
1139                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1140
1141         sample.cpumode = PERF_RECORD_MISC_USER;
1142         sample.ip = ptq->state->from_ip;
1143         sample.pid = ptq->pid;
1144         sample.tid = ptq->tid;
1145         sample.addr = ptq->state->to_ip;
1146         sample.id = ptq->pt->instructions_id;
1147         sample.stream_id = ptq->pt->instructions_id;
1148         sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1149         sample.cpu = ptq->cpu;
1150         sample.flags = ptq->flags;
1151         sample.insn_len = ptq->insn_len;
1152         memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1153
1154         ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1155
1156         if (pt->synth_opts.callchain) {
1157                 thread_stack__sample(ptq->thread, ptq->chain,
1158                                      pt->synth_opts.callchain_sz, sample.ip);
1159                 sample.callchain = ptq->chain;
1160         }
1161
1162         if (pt->synth_opts.last_branch) {
1163                 intel_pt_copy_last_branch_rb(ptq);
1164                 sample.branch_stack = ptq->last_branch;
1165         }
1166
1167         if (pt->synth_opts.inject) {
1168                 ret = intel_pt_inject_event(event, &sample,
1169                                             pt->instructions_sample_type,
1170                                             pt->synth_needs_swap);
1171                 if (ret)
1172                         return ret;
1173         }
1174
1175         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1176         if (ret)
1177                 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
1178                        ret);
1179
1180         if (pt->synth_opts.last_branch)
1181                 intel_pt_reset_last_branch_rb(ptq);
1182
1183         return ret;
1184 }
1185
1186 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1187 {
1188         int ret;
1189         struct intel_pt *pt = ptq->pt;
1190         union perf_event *event = ptq->event_buf;
1191         struct perf_sample sample = { .ip = 0, };
1192
1193         if (pt->synth_opts.initial_skip &&
1194             pt->num_events++ < pt->synth_opts.initial_skip)
1195                 return 0;
1196
1197         event->sample.header.type = PERF_RECORD_SAMPLE;
1198         event->sample.header.misc = PERF_RECORD_MISC_USER;
1199         event->sample.header.size = sizeof(struct perf_event_header);
1200
1201         if (!pt->timeless_decoding)
1202                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1203
1204         sample.cpumode = PERF_RECORD_MISC_USER;
1205         sample.ip = ptq->state->from_ip;
1206         sample.pid = ptq->pid;
1207         sample.tid = ptq->tid;
1208         sample.addr = ptq->state->to_ip;
1209         sample.id = ptq->pt->transactions_id;
1210         sample.stream_id = ptq->pt->transactions_id;
1211         sample.period = 1;
1212         sample.cpu = ptq->cpu;
1213         sample.flags = ptq->flags;
1214         sample.insn_len = ptq->insn_len;
1215         memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1216
1217         if (pt->synth_opts.callchain) {
1218                 thread_stack__sample(ptq->thread, ptq->chain,
1219                                      pt->synth_opts.callchain_sz, sample.ip);
1220                 sample.callchain = ptq->chain;
1221         }
1222
1223         if (pt->synth_opts.last_branch) {
1224                 intel_pt_copy_last_branch_rb(ptq);
1225                 sample.branch_stack = ptq->last_branch;
1226         }
1227
1228         if (pt->synth_opts.inject) {
1229                 ret = intel_pt_inject_event(event, &sample,
1230                                             pt->transactions_sample_type,
1231                                             pt->synth_needs_swap);
1232                 if (ret)
1233                         return ret;
1234         }
1235
1236         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1237         if (ret)
1238                 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1239                        ret);
1240
1241         if (pt->synth_opts.last_branch)
1242                 intel_pt_reset_last_branch_rb(ptq);
1243
1244         return ret;
1245 }
1246
1247 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1248                                 pid_t pid, pid_t tid, u64 ip)
1249 {
1250         union perf_event event;
1251         char msg[MAX_AUXTRACE_ERROR_MSG];
1252         int err;
1253
1254         intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1255
1256         auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1257                              code, cpu, pid, tid, ip, msg);
1258
1259         err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1260         if (err)
1261                 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1262                        err);
1263
1264         return err;
1265 }
1266
1267 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1268 {
1269         struct auxtrace_queue *queue;
1270         pid_t tid = ptq->next_tid;
1271         int err;
1272
1273         if (tid == -1)
1274                 return 0;
1275
1276         intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1277
1278         err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1279
1280         queue = &pt->queues.queue_array[ptq->queue_nr];
1281         intel_pt_set_pid_tid_cpu(pt, queue);
1282
1283         ptq->next_tid = -1;
1284
1285         return err;
1286 }
1287
1288 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1289 {
1290         struct intel_pt *pt = ptq->pt;
1291
1292         return ip == pt->switch_ip &&
1293                (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1294                !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1295                                PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1296 }
1297
1298 static int intel_pt_sample(struct intel_pt_queue *ptq)
1299 {
1300         const struct intel_pt_state *state = ptq->state;
1301         struct intel_pt *pt = ptq->pt;
1302         int err;
1303
1304         if (!ptq->have_sample)
1305                 return 0;
1306
1307         ptq->have_sample = false;
1308
1309         if (pt->sample_instructions &&
1310             (state->type & INTEL_PT_INSTRUCTION) &&
1311             (!pt->synth_opts.initial_skip ||
1312              pt->num_events++ >= pt->synth_opts.initial_skip)) {
1313                 err = intel_pt_synth_instruction_sample(ptq);
1314                 if (err)
1315                         return err;
1316         }
1317
1318         if (pt->sample_transactions &&
1319             (state->type & INTEL_PT_TRANSACTION) &&
1320             (!pt->synth_opts.initial_skip ||
1321              pt->num_events++ >= pt->synth_opts.initial_skip)) {
1322                 err = intel_pt_synth_transaction_sample(ptq);
1323                 if (err)
1324                         return err;
1325         }
1326
1327         if (!(state->type & INTEL_PT_BRANCH))
1328                 return 0;
1329
1330         if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1331                 thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1332                                     state->to_ip, ptq->insn_len,
1333                                     state->trace_nr);
1334         else
1335                 thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1336
1337         if (pt->sample_branches) {
1338                 err = intel_pt_synth_branch_sample(ptq);
1339                 if (err)
1340                         return err;
1341         }
1342
1343         if (pt->synth_opts.last_branch)
1344                 intel_pt_update_last_branch_rb(ptq);
1345
1346         if (!pt->sync_switch)
1347                 return 0;
1348
1349         if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1350                 switch (ptq->switch_state) {
1351                 case INTEL_PT_SS_UNKNOWN:
1352                 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1353                         err = intel_pt_next_tid(pt, ptq);
1354                         if (err)
1355                                 return err;
1356                         ptq->switch_state = INTEL_PT_SS_TRACING;
1357                         break;
1358                 default:
1359                         ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1360                         return 1;
1361                 }
1362         } else if (!state->to_ip) {
1363                 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1364         } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1365                 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1366         } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1367                    state->to_ip == pt->ptss_ip &&
1368                    (ptq->flags & PERF_IP_FLAG_CALL)) {
1369                 ptq->switch_state = INTEL_PT_SS_TRACING;
1370         }
1371
1372         return 0;
1373 }
1374
1375 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1376 {
1377         struct machine *machine = pt->machine;
1378         struct map *map;
1379         struct symbol *sym, *start;
1380         u64 ip, switch_ip = 0;
1381         const char *ptss;
1382
1383         if (ptss_ip)
1384                 *ptss_ip = 0;
1385
1386         map = machine__kernel_map(machine);
1387         if (!map)
1388                 return 0;
1389
1390         if (map__load(map))
1391                 return 0;
1392
1393         start = dso__first_symbol(map->dso, MAP__FUNCTION);
1394
1395         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1396                 if (sym->binding == STB_GLOBAL &&
1397                     !strcmp(sym->name, "__switch_to")) {
1398                         ip = map->unmap_ip(map, sym->start);
1399                         if (ip >= map->start && ip < map->end) {
1400                                 switch_ip = ip;
1401                                 break;
1402                         }
1403                 }
1404         }
1405
1406         if (!switch_ip || !ptss_ip)
1407                 return 0;
1408
1409         if (pt->have_sched_switch == 1)
1410                 ptss = "perf_trace_sched_switch";
1411         else
1412                 ptss = "__perf_event_task_sched_out";
1413
1414         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1415                 if (!strcmp(sym->name, ptss)) {
1416                         ip = map->unmap_ip(map, sym->start);
1417                         if (ip >= map->start && ip < map->end) {
1418                                 *ptss_ip = ip;
1419                                 break;
1420                         }
1421                 }
1422         }
1423
1424         return switch_ip;
1425 }
1426
1427 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1428 {
1429         const struct intel_pt_state *state = ptq->state;
1430         struct intel_pt *pt = ptq->pt;
1431         int err;
1432
1433         if (!pt->kernel_start) {
1434                 pt->kernel_start = machine__kernel_start(pt->machine);
1435                 if (pt->per_cpu_mmaps &&
1436                     (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1437                     !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1438                     !pt->sampling_mode) {
1439                         pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1440                         if (pt->switch_ip) {
1441                                 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1442                                              pt->switch_ip, pt->ptss_ip);
1443                                 pt->sync_switch = true;
1444                         }
1445                 }
1446         }
1447
1448         intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1449                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1450         while (1) {
1451                 err = intel_pt_sample(ptq);
1452                 if (err)
1453                         return err;
1454
1455                 state = intel_pt_decode(ptq->decoder);
1456                 if (state->err) {
1457                         if (state->err == INTEL_PT_ERR_NODATA)
1458                                 return 1;
1459                         if (pt->sync_switch &&
1460                             state->from_ip >= pt->kernel_start) {
1461                                 pt->sync_switch = false;
1462                                 intel_pt_next_tid(pt, ptq);
1463                         }
1464                         if (pt->synth_opts.errors) {
1465                                 err = intel_pt_synth_error(pt, state->err,
1466                                                            ptq->cpu, ptq->pid,
1467                                                            ptq->tid,
1468                                                            state->from_ip);
1469                                 if (err)
1470                                         return err;
1471                         }
1472                         continue;
1473                 }
1474
1475                 ptq->state = state;
1476                 ptq->have_sample = true;
1477                 intel_pt_sample_flags(ptq);
1478
1479                 /* Use estimated TSC upon return to user space */
1480                 if (pt->est_tsc &&
1481                     (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1482                     state->to_ip && state->to_ip < pt->kernel_start) {
1483                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1484                                      state->timestamp, state->est_timestamp);
1485                         ptq->timestamp = state->est_timestamp;
1486                 /* Use estimated TSC in unknown switch state */
1487                 } else if (pt->sync_switch &&
1488                            ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1489                            intel_pt_is_switch_ip(ptq, state->to_ip) &&
1490                            ptq->next_tid == -1) {
1491                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1492                                      state->timestamp, state->est_timestamp);
1493                         ptq->timestamp = state->est_timestamp;
1494                 } else if (state->timestamp > ptq->timestamp) {
1495                         ptq->timestamp = state->timestamp;
1496                 }
1497
1498                 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1499                         *timestamp = ptq->timestamp;
1500                         return 0;
1501                 }
1502         }
1503         return 0;
1504 }
1505
1506 static inline int intel_pt_update_queues(struct intel_pt *pt)
1507 {
1508         if (pt->queues.new_data) {
1509                 pt->queues.new_data = false;
1510                 return intel_pt_setup_queues(pt);
1511         }
1512         return 0;
1513 }
1514
1515 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1516 {
1517         unsigned int queue_nr;
1518         u64 ts;
1519         int ret;
1520
1521         while (1) {
1522                 struct auxtrace_queue *queue;
1523                 struct intel_pt_queue *ptq;
1524
1525                 if (!pt->heap.heap_cnt)
1526                         return 0;
1527
1528                 if (pt->heap.heap_array[0].ordinal >= timestamp)
1529                         return 0;
1530
1531                 queue_nr = pt->heap.heap_array[0].queue_nr;
1532                 queue = &pt->queues.queue_array[queue_nr];
1533                 ptq = queue->priv;
1534
1535                 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1536                              queue_nr, pt->heap.heap_array[0].ordinal,
1537                              timestamp);
1538
1539                 auxtrace_heap__pop(&pt->heap);
1540
1541                 if (pt->heap.heap_cnt) {
1542                         ts = pt->heap.heap_array[0].ordinal + 1;
1543                         if (ts > timestamp)
1544                                 ts = timestamp;
1545                 } else {
1546                         ts = timestamp;
1547                 }
1548
1549                 intel_pt_set_pid_tid_cpu(pt, queue);
1550
1551                 ret = intel_pt_run_decoder(ptq, &ts);
1552
1553                 if (ret < 0) {
1554                         auxtrace_heap__add(&pt->heap, queue_nr, ts);
1555                         return ret;
1556                 }
1557
1558                 if (!ret) {
1559                         ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1560                         if (ret < 0)
1561                                 return ret;
1562                 } else {
1563                         ptq->on_heap = false;
1564                 }
1565         }
1566
1567         return 0;
1568 }
1569
1570 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1571                                             u64 time_)
1572 {
1573         struct auxtrace_queues *queues = &pt->queues;
1574         unsigned int i;
1575         u64 ts = 0;
1576
1577         for (i = 0; i < queues->nr_queues; i++) {
1578                 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1579                 struct intel_pt_queue *ptq = queue->priv;
1580
1581                 if (ptq && (tid == -1 || ptq->tid == tid)) {
1582                         ptq->time = time_;
1583                         intel_pt_set_pid_tid_cpu(pt, queue);
1584                         intel_pt_run_decoder(ptq, &ts);
1585                 }
1586         }
1587         return 0;
1588 }
1589
1590 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1591 {
1592         return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1593                                     sample->pid, sample->tid, 0);
1594 }
1595
1596 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1597 {
1598         unsigned i, j;
1599
1600         if (cpu < 0 || !pt->queues.nr_queues)
1601                 return NULL;
1602
1603         if ((unsigned)cpu >= pt->queues.nr_queues)
1604                 i = pt->queues.nr_queues - 1;
1605         else
1606                 i = cpu;
1607
1608         if (pt->queues.queue_array[i].cpu == cpu)
1609                 return pt->queues.queue_array[i].priv;
1610
1611         for (j = 0; i > 0; j++) {
1612                 if (pt->queues.queue_array[--i].cpu == cpu)
1613                         return pt->queues.queue_array[i].priv;
1614         }
1615
1616         for (; j < pt->queues.nr_queues; j++) {
1617                 if (pt->queues.queue_array[j].cpu == cpu)
1618                         return pt->queues.queue_array[j].priv;
1619         }
1620
1621         return NULL;
1622 }
1623
1624 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1625                                 u64 timestamp)
1626 {
1627         struct intel_pt_queue *ptq;
1628         int err;
1629
1630         if (!pt->sync_switch)
1631                 return 1;
1632
1633         ptq = intel_pt_cpu_to_ptq(pt, cpu);
1634         if (!ptq)
1635                 return 1;
1636
1637         switch (ptq->switch_state) {
1638         case INTEL_PT_SS_NOT_TRACING:
1639                 ptq->next_tid = -1;
1640                 break;
1641         case INTEL_PT_SS_UNKNOWN:
1642         case INTEL_PT_SS_TRACING:
1643                 ptq->next_tid = tid;
1644                 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1645                 return 0;
1646         case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1647                 if (!ptq->on_heap) {
1648                         ptq->timestamp = perf_time_to_tsc(timestamp,
1649                                                           &pt->tc);
1650                         err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1651                                                  ptq->timestamp);
1652                         if (err)
1653                                 return err;
1654                         ptq->on_heap = true;
1655                 }
1656                 ptq->switch_state = INTEL_PT_SS_TRACING;
1657                 break;
1658         case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1659                 ptq->next_tid = tid;
1660                 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1661                 break;
1662         default:
1663                 break;
1664         }
1665
1666         return 1;
1667 }
1668
1669 static int intel_pt_process_switch(struct intel_pt *pt,
1670                                    struct perf_sample *sample)
1671 {
1672         struct perf_evsel *evsel;
1673         pid_t tid;
1674         int cpu, ret;
1675
1676         evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1677         if (evsel != pt->switch_evsel)
1678                 return 0;
1679
1680         tid = perf_evsel__intval(evsel, sample, "next_pid");
1681         cpu = sample->cpu;
1682
1683         intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1684                      cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1685                      &pt->tc));
1686
1687         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1688         if (ret <= 0)
1689                 return ret;
1690
1691         return machine__set_current_tid(pt->machine, cpu, -1, tid);
1692 }
1693
1694 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1695                                    struct perf_sample *sample)
1696 {
1697         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1698         pid_t pid, tid;
1699         int cpu, ret;
1700
1701         cpu = sample->cpu;
1702
1703         if (pt->have_sched_switch == 3) {
1704                 if (!out)
1705                         return 0;
1706                 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
1707                         pr_err("Expecting CPU-wide context switch event\n");
1708                         return -EINVAL;
1709                 }
1710                 pid = event->context_switch.next_prev_pid;
1711                 tid = event->context_switch.next_prev_tid;
1712         } else {
1713                 if (out)
1714                         return 0;
1715                 pid = sample->pid;
1716                 tid = sample->tid;
1717         }
1718
1719         if (tid == -1) {
1720                 pr_err("context_switch event has no tid\n");
1721                 return -EINVAL;
1722         }
1723
1724         intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1725                      cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
1726                      &pt->tc));
1727
1728         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1729         if (ret <= 0)
1730                 return ret;
1731
1732         return machine__set_current_tid(pt->machine, cpu, pid, tid);
1733 }
1734
1735 static int intel_pt_process_itrace_start(struct intel_pt *pt,
1736                                          union perf_event *event,
1737                                          struct perf_sample *sample)
1738 {
1739         if (!pt->per_cpu_mmaps)
1740                 return 0;
1741
1742         intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1743                      sample->cpu, event->itrace_start.pid,
1744                      event->itrace_start.tid, sample->time,
1745                      perf_time_to_tsc(sample->time, &pt->tc));
1746
1747         return machine__set_current_tid(pt->machine, sample->cpu,
1748                                         event->itrace_start.pid,
1749                                         event->itrace_start.tid);
1750 }
1751
1752 static int intel_pt_process_event(struct perf_session *session,
1753                                   union perf_event *event,
1754                                   struct perf_sample *sample,
1755                                   struct perf_tool *tool)
1756 {
1757         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1758                                            auxtrace);
1759         u64 timestamp;
1760         int err = 0;
1761
1762         if (dump_trace)
1763                 return 0;
1764
1765         if (!tool->ordered_events) {
1766                 pr_err("Intel Processor Trace requires ordered events\n");
1767                 return -EINVAL;
1768         }
1769
1770         if (sample->time && sample->time != (u64)-1)
1771                 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1772         else
1773                 timestamp = 0;
1774
1775         if (timestamp || pt->timeless_decoding) {
1776                 err = intel_pt_update_queues(pt);
1777                 if (err)
1778                         return err;
1779         }
1780
1781         if (pt->timeless_decoding) {
1782                 if (event->header.type == PERF_RECORD_EXIT) {
1783                         err = intel_pt_process_timeless_queues(pt,
1784                                                                event->fork.tid,
1785                                                                sample->time);
1786                 }
1787         } else if (timestamp) {
1788                 err = intel_pt_process_queues(pt, timestamp);
1789         }
1790         if (err)
1791                 return err;
1792
1793         if (event->header.type == PERF_RECORD_AUX &&
1794             (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1795             pt->synth_opts.errors) {
1796                 err = intel_pt_lost(pt, sample);
1797                 if (err)
1798                         return err;
1799         }
1800
1801         if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1802                 err = intel_pt_process_switch(pt, sample);
1803         else if (event->header.type == PERF_RECORD_ITRACE_START)
1804                 err = intel_pt_process_itrace_start(pt, event, sample);
1805         else if (event->header.type == PERF_RECORD_SWITCH ||
1806                  event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
1807                 err = intel_pt_context_switch(pt, event, sample);
1808
1809         intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1810                      perf_event__name(event->header.type), event->header.type,
1811                      sample->cpu, sample->time, timestamp);
1812
1813         return err;
1814 }
1815
1816 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1817 {
1818         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1819                                            auxtrace);
1820         int ret;
1821
1822         if (dump_trace)
1823                 return 0;
1824
1825         if (!tool->ordered_events)
1826                 return -EINVAL;
1827
1828         ret = intel_pt_update_queues(pt);
1829         if (ret < 0)
1830                 return ret;
1831
1832         if (pt->timeless_decoding)
1833                 return intel_pt_process_timeless_queues(pt, -1,
1834                                                         MAX_TIMESTAMP - 1);
1835
1836         return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1837 }
1838
1839 static void intel_pt_free_events(struct perf_session *session)
1840 {
1841         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1842                                            auxtrace);
1843         struct auxtrace_queues *queues = &pt->queues;
1844         unsigned int i;
1845
1846         for (i = 0; i < queues->nr_queues; i++) {
1847                 intel_pt_free_queue(queues->queue_array[i].priv);
1848                 queues->queue_array[i].priv = NULL;
1849         }
1850         intel_pt_log_disable();
1851         auxtrace_queues__free(queues);
1852 }
1853
1854 static void intel_pt_free(struct perf_session *session)
1855 {
1856         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1857                                            auxtrace);
1858
1859         auxtrace_heap__free(&pt->heap);
1860         intel_pt_free_events(session);
1861         session->auxtrace = NULL;
1862         thread__put(pt->unknown_thread);
1863         addr_filters__exit(&pt->filts);
1864         zfree(&pt->filter);
1865         free(pt);
1866 }
1867
1868 static int intel_pt_process_auxtrace_event(struct perf_session *session,
1869                                            union perf_event *event,
1870                                            struct perf_tool *tool __maybe_unused)
1871 {
1872         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1873                                            auxtrace);
1874
1875         if (pt->sampling_mode)
1876                 return 0;
1877
1878         if (!pt->data_queued) {
1879                 struct auxtrace_buffer *buffer;
1880                 off_t data_offset;
1881                 int fd = perf_data_file__fd(session->file);
1882                 int err;
1883
1884                 if (perf_data_file__is_pipe(session->file)) {
1885                         data_offset = 0;
1886                 } else {
1887                         data_offset = lseek(fd, 0, SEEK_CUR);
1888                         if (data_offset == -1)
1889                                 return -errno;
1890                 }
1891
1892                 err = auxtrace_queues__add_event(&pt->queues, session, event,
1893                                                  data_offset, &buffer);
1894                 if (err)
1895                         return err;
1896
1897                 /* Dump here now we have copied a piped trace out of the pipe */
1898                 if (dump_trace) {
1899                         if (auxtrace_buffer__get_data(buffer, fd)) {
1900                                 intel_pt_dump_event(pt, buffer->data,
1901                                                     buffer->size);
1902                                 auxtrace_buffer__put_data(buffer);
1903                         }
1904                 }
1905         }
1906
1907         return 0;
1908 }
1909
1910 struct intel_pt_synth {
1911         struct perf_tool dummy_tool;
1912         struct perf_session *session;
1913 };
1914
1915 static int intel_pt_event_synth(struct perf_tool *tool,
1916                                 union perf_event *event,
1917                                 struct perf_sample *sample __maybe_unused,
1918                                 struct machine *machine __maybe_unused)
1919 {
1920         struct intel_pt_synth *intel_pt_synth =
1921                         container_of(tool, struct intel_pt_synth, dummy_tool);
1922
1923         return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1924                                                  NULL);
1925 }
1926
1927 static int intel_pt_synth_event(struct perf_session *session,
1928                                 struct perf_event_attr *attr, u64 id)
1929 {
1930         struct intel_pt_synth intel_pt_synth;
1931
1932         memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1933         intel_pt_synth.session = session;
1934
1935         return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1936                                            &id, intel_pt_event_synth);
1937 }
1938
1939 static int intel_pt_synth_events(struct intel_pt *pt,
1940                                  struct perf_session *session)
1941 {
1942         struct perf_evlist *evlist = session->evlist;
1943         struct perf_evsel *evsel;
1944         struct perf_event_attr attr;
1945         bool found = false;
1946         u64 id;
1947         int err;
1948
1949         evlist__for_each_entry(evlist, evsel) {
1950                 if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1951                         found = true;
1952                         break;
1953                 }
1954         }
1955
1956         if (!found) {
1957                 pr_debug("There are no selected events with Intel Processor Trace data\n");
1958                 return 0;
1959         }
1960
1961         memset(&attr, 0, sizeof(struct perf_event_attr));
1962         attr.size = sizeof(struct perf_event_attr);
1963         attr.type = PERF_TYPE_HARDWARE;
1964         attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1965         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1966                             PERF_SAMPLE_PERIOD;
1967         if (pt->timeless_decoding)
1968                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1969         else
1970                 attr.sample_type |= PERF_SAMPLE_TIME;
1971         if (!pt->per_cpu_mmaps)
1972                 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1973         attr.exclude_user = evsel->attr.exclude_user;
1974         attr.exclude_kernel = evsel->attr.exclude_kernel;
1975         attr.exclude_hv = evsel->attr.exclude_hv;
1976         attr.exclude_host = evsel->attr.exclude_host;
1977         attr.exclude_guest = evsel->attr.exclude_guest;
1978         attr.sample_id_all = evsel->attr.sample_id_all;
1979         attr.read_format = evsel->attr.read_format;
1980
1981         id = evsel->id[0] + 1000000000;
1982         if (!id)
1983                 id = 1;
1984
1985         if (pt->synth_opts.instructions) {
1986                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1987                 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1988                         attr.sample_period =
1989                                 intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1990                 else
1991                         attr.sample_period = pt->synth_opts.period;
1992                 pt->instructions_sample_period = attr.sample_period;
1993                 if (pt->synth_opts.callchain)
1994                         attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1995                 if (pt->synth_opts.last_branch)
1996                         attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1997                 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1998                          id, (u64)attr.sample_type);
1999                 err = intel_pt_synth_event(session, &attr, id);
2000                 if (err) {
2001                         pr_err("%s: failed to synthesize 'instructions' event type\n",
2002                                __func__);
2003                         return err;
2004                 }
2005                 pt->sample_instructions = true;
2006                 pt->instructions_sample_type = attr.sample_type;
2007                 pt->instructions_id = id;
2008                 id += 1;
2009         }
2010
2011         if (pt->synth_opts.transactions) {
2012                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2013                 attr.sample_period = 1;
2014                 if (pt->synth_opts.callchain)
2015                         attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2016                 if (pt->synth_opts.last_branch)
2017                         attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2018                 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2019                          id, (u64)attr.sample_type);
2020                 err = intel_pt_synth_event(session, &attr, id);
2021                 if (err) {
2022                         pr_err("%s: failed to synthesize 'transactions' event type\n",
2023                                __func__);
2024                         return err;
2025                 }
2026                 pt->sample_transactions = true;
2027                 pt->transactions_id = id;
2028                 id += 1;
2029                 evlist__for_each_entry(evlist, evsel) {
2030                         if (evsel->id && evsel->id[0] == pt->transactions_id) {
2031                                 if (evsel->name)
2032                                         zfree(&evsel->name);
2033                                 evsel->name = strdup("transactions");
2034                                 break;
2035                         }
2036                 }
2037         }
2038
2039         if (pt->synth_opts.branches) {
2040                 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
2041                 attr.sample_period = 1;
2042                 attr.sample_type |= PERF_SAMPLE_ADDR;
2043                 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
2044                 attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
2045                 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2046                          id, (u64)attr.sample_type);
2047                 err = intel_pt_synth_event(session, &attr, id);
2048                 if (err) {
2049                         pr_err("%s: failed to synthesize 'branches' event type\n",
2050                                __func__);
2051                         return err;
2052                 }
2053                 pt->sample_branches = true;
2054                 pt->branches_sample_type = attr.sample_type;
2055                 pt->branches_id = id;
2056         }
2057
2058         pt->synth_needs_swap = evsel->needs_swap;
2059
2060         return 0;
2061 }
2062
2063 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
2064 {
2065         struct perf_evsel *evsel;
2066
2067         evlist__for_each_entry_reverse(evlist, evsel) {
2068                 const char *name = perf_evsel__name(evsel);
2069
2070                 if (!strcmp(name, "sched:sched_switch"))
2071                         return evsel;
2072         }
2073
2074         return NULL;
2075 }
2076
2077 static bool intel_pt_find_switch(struct perf_evlist *evlist)
2078 {
2079         struct perf_evsel *evsel;
2080
2081         evlist__for_each_entry(evlist, evsel) {
2082                 if (evsel->attr.context_switch)
2083                         return true;
2084         }
2085
2086         return false;
2087 }
2088
2089 static int intel_pt_perf_config(const char *var, const char *value, void *data)
2090 {
2091         struct intel_pt *pt = data;
2092
2093         if (!strcmp(var, "intel-pt.mispred-all"))
2094                 pt->mispred_all = perf_config_bool(var, value);
2095
2096         return 0;
2097 }
2098
2099 static const char * const intel_pt_info_fmts[] = {
2100         [INTEL_PT_PMU_TYPE]             = "  PMU Type            %"PRId64"\n",
2101         [INTEL_PT_TIME_SHIFT]           = "  Time Shift          %"PRIu64"\n",
2102         [INTEL_PT_TIME_MULT]            = "  Time Muliplier      %"PRIu64"\n",
2103         [INTEL_PT_TIME_ZERO]            = "  Time Zero           %"PRIu64"\n",
2104         [INTEL_PT_CAP_USER_TIME_ZERO]   = "  Cap Time Zero       %"PRId64"\n",
2105         [INTEL_PT_TSC_BIT]              = "  TSC bit             %#"PRIx64"\n",
2106         [INTEL_PT_NORETCOMP_BIT]        = "  NoRETComp bit       %#"PRIx64"\n",
2107         [INTEL_PT_HAVE_SCHED_SWITCH]    = "  Have sched_switch   %"PRId64"\n",
2108         [INTEL_PT_SNAPSHOT_MODE]        = "  Snapshot mode       %"PRId64"\n",
2109         [INTEL_PT_PER_CPU_MMAPS]        = "  Per-cpu maps        %"PRId64"\n",
2110         [INTEL_PT_MTC_BIT]              = "  MTC bit             %#"PRIx64"\n",
2111         [INTEL_PT_TSC_CTC_N]            = "  TSC:CTC numerator   %"PRIu64"\n",
2112         [INTEL_PT_TSC_CTC_D]            = "  TSC:CTC denominator %"PRIu64"\n",
2113         [INTEL_PT_CYC_BIT]              = "  CYC bit             %#"PRIx64"\n",
2114         [INTEL_PT_MAX_NONTURBO_RATIO]   = "  Max non-turbo ratio %"PRIu64"\n",
2115         [INTEL_PT_FILTER_STR_LEN]       = "  Filter string len.  %"PRIu64"\n",
2116 };
2117
2118 static void intel_pt_print_info(u64 *arr, int start, int finish)
2119 {
2120         int i;
2121
2122         if (!dump_trace)
2123                 return;
2124
2125         for (i = start; i <= finish; i++)
2126                 fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2127 }
2128
2129 static void intel_pt_print_info_str(const char *name, const char *str)
2130 {
2131         if (!dump_trace)
2132                 return;
2133
2134         fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
2135 }
2136
2137 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
2138 {
2139         return auxtrace_info->header.size >=
2140                 sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
2141 }
2142
2143 int intel_pt_process_auxtrace_info(union perf_event *event,
2144                                    struct perf_session *session)
2145 {
2146         struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2147         size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2148         struct intel_pt *pt;
2149         void *info_end;
2150         u64 *info;
2151         int err;
2152
2153         if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2154                                         min_sz)
2155                 return -EINVAL;
2156
2157         pt = zalloc(sizeof(struct intel_pt));
2158         if (!pt)
2159                 return -ENOMEM;
2160
2161         addr_filters__init(&pt->filts);
2162
2163         err = perf_config(intel_pt_perf_config, pt);
2164         if (err)
2165                 goto err_free;
2166
2167         err = auxtrace_queues__init(&pt->queues);
2168         if (err)
2169                 goto err_free;
2170
2171         intel_pt_log_set_name(INTEL_PT_PMU_NAME);
2172
2173         pt->session = session;
2174         pt->machine = &session->machines.host; /* No kvm support */
2175         pt->auxtrace_type = auxtrace_info->type;
2176         pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2177         pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2178         pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2179         pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2180         pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2181         pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2182         pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2183         pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2184         pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2185         pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2186         intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2187                             INTEL_PT_PER_CPU_MMAPS);
2188
2189         if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
2190                 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2191                 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2192                 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2193                 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2194                 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2195                 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2196                                     INTEL_PT_CYC_BIT);
2197         }
2198
2199         if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
2200                 pt->max_non_turbo_ratio =
2201                         auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
2202                 intel_pt_print_info(&auxtrace_info->priv[0],
2203                                     INTEL_PT_MAX_NONTURBO_RATIO,
2204                                     INTEL_PT_MAX_NONTURBO_RATIO);
2205         }
2206
2207         info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
2208         info_end = (void *)info + auxtrace_info->header.size;
2209
2210         if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
2211                 size_t len;
2212
2213                 len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
2214                 intel_pt_print_info(&auxtrace_info->priv[0],
2215                                     INTEL_PT_FILTER_STR_LEN,
2216                                     INTEL_PT_FILTER_STR_LEN);
2217                 if (len) {
2218                         const char *filter = (const char *)info;
2219
2220                         len = roundup(len + 1, 8);
2221                         info += len >> 3;
2222                         if ((void *)info > info_end) {
2223                                 pr_err("%s: bad filter string length\n", __func__);
2224                                 err = -EINVAL;
2225                                 goto err_free_queues;
2226                         }
2227                         pt->filter = memdup(filter, len);
2228                         if (!pt->filter) {
2229                                 err = -ENOMEM;
2230                                 goto err_free_queues;
2231                         }
2232                         if (session->header.needs_swap)
2233                                 mem_bswap_64(pt->filter, len);
2234                         if (pt->filter[len - 1]) {
2235                                 pr_err("%s: filter string not null terminated\n", __func__);
2236                                 err = -EINVAL;
2237                                 goto err_free_queues;
2238                         }
2239                         err = addr_filters__parse_bare_filter(&pt->filts,
2240                                                               filter);
2241                         if (err)
2242                                 goto err_free_queues;
2243                 }
2244                 intel_pt_print_info_str("Filter string", pt->filter);
2245         }
2246
2247         pt->timeless_decoding = intel_pt_timeless_decoding(pt);
2248         pt->have_tsc = intel_pt_have_tsc(pt);
2249         pt->sampling_mode = false;
2250         pt->est_tsc = !pt->timeless_decoding;
2251
2252         pt->unknown_thread = thread__new(999999999, 999999999);
2253         if (!pt->unknown_thread) {
2254                 err = -ENOMEM;
2255                 goto err_free_queues;
2256         }
2257
2258         /*
2259          * Since this thread will not be kept in any rbtree not in a
2260          * list, initialize its list node so that at thread__put() the
2261          * current thread lifetime assuption is kept and we don't segfault
2262          * at list_del_init().
2263          */
2264         INIT_LIST_HEAD(&pt->unknown_thread->node);
2265
2266         err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2267         if (err)
2268                 goto err_delete_thread;
2269         if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
2270                 err = -ENOMEM;
2271                 goto err_delete_thread;
2272         }
2273
2274         pt->auxtrace.process_event = intel_pt_process_event;
2275         pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
2276         pt->auxtrace.flush_events = intel_pt_flush;
2277         pt->auxtrace.free_events = intel_pt_free_events;
2278         pt->auxtrace.free = intel_pt_free;
2279         session->auxtrace = &pt->auxtrace;
2280
2281         if (dump_trace)
2282                 return 0;
2283
2284         if (pt->have_sched_switch == 1) {
2285                 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
2286                 if (!pt->switch_evsel) {
2287                         pr_err("%s: missing sched_switch event\n", __func__);
2288                         err = -EINVAL;
2289                         goto err_delete_thread;
2290                 }
2291         } else if (pt->have_sched_switch == 2 &&
2292                    !intel_pt_find_switch(session->evlist)) {
2293                 pr_err("%s: missing context_switch attribute flag\n", __func__);
2294                 err = -EINVAL;
2295                 goto err_delete_thread;
2296         }
2297
2298         if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2299                 pt->synth_opts = *session->itrace_synth_opts;
2300         } else {
2301                 itrace_synth_opts__set_default(&pt->synth_opts);
2302                 if (use_browser != -1) {
2303                         pt->synth_opts.branches = false;
2304                         pt->synth_opts.callchain = true;
2305                 }
2306                 if (session->itrace_synth_opts)
2307                         pt->synth_opts.thread_stack =
2308                                 session->itrace_synth_opts->thread_stack;
2309         }
2310
2311         if (pt->synth_opts.log)
2312                 intel_pt_log_enable();
2313
2314         /* Maximum non-turbo ratio is TSC freq / 100 MHz */
2315         if (pt->tc.time_mult) {
2316                 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2317
2318                 if (!pt->max_non_turbo_ratio)
2319                         pt->max_non_turbo_ratio =
2320                                         (tsc_freq + 50000000) / 100000000;
2321                 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2322                 intel_pt_log("Maximum non-turbo ratio %u\n",
2323                              pt->max_non_turbo_ratio);
2324         }
2325
2326         if (pt->synth_opts.calls)
2327                 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2328                                        PERF_IP_FLAG_TRACE_END;
2329         if (pt->synth_opts.returns)
2330                 pt->branches_filter |= PERF_IP_FLAG_RETURN |
2331                                        PERF_IP_FLAG_TRACE_BEGIN;
2332
2333         if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
2334                 symbol_conf.use_callchain = true;
2335                 if (callchain_register_param(&callchain_param) < 0) {
2336                         symbol_conf.use_callchain = false;
2337                         pt->synth_opts.callchain = false;
2338                 }
2339         }
2340
2341         err = intel_pt_synth_events(pt, session);
2342         if (err)
2343                 goto err_delete_thread;
2344
2345         err = auxtrace_queues__process_index(&pt->queues, session);
2346         if (err)
2347                 goto err_delete_thread;
2348
2349         if (pt->queues.populated)
2350                 pt->data_queued = true;
2351
2352         if (pt->timeless_decoding)
2353                 pr_debug2("Intel PT decoding without timestamps\n");
2354
2355         return 0;
2356
2357 err_delete_thread:
2358         thread__zput(pt->unknown_thread);
2359 err_free_queues:
2360         intel_pt_log_disable();
2361         auxtrace_queues__free(&pt->queues);
2362         session->auxtrace = NULL;
2363 err_free:
2364         addr_filters__exit(&pt->filts);
2365         zfree(&pt->filter);
2366         free(pt);
2367         return err;
2368 }