]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/util/evlist.c
f74ea2e55fde893ef579733108fc7801f820d504
[karo-tx-linux.git] / tools / perf / util / evlist.c
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9 #include "util.h"
10 #include <api/fs/fs.h>
11 #include <errno.h>
12 #include <inttypes.h>
13 #include <poll.h>
14 #include "cpumap.h"
15 #include "thread_map.h"
16 #include "target.h"
17 #include "evlist.h"
18 #include "evsel.h"
19 #include "debug.h"
20 #include "asm/bug.h"
21 #include <unistd.h>
22
23 #include "parse-events.h"
24 #include <subcmd/parse-options.h>
25
26 #include <sys/mman.h>
27
28 #include <linux/bitops.h>
29 #include <linux/hash.h>
30 #include <linux/log2.h>
31 #include <linux/err.h>
32
33 static void perf_mmap__munmap(struct perf_mmap *map);
34 static void perf_mmap__put(struct perf_mmap *map);
35
36 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
37 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
38
39 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
40                        struct thread_map *threads)
41 {
42         int i;
43
44         for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
45                 INIT_HLIST_HEAD(&evlist->heads[i]);
46         INIT_LIST_HEAD(&evlist->entries);
47         perf_evlist__set_maps(evlist, cpus, threads);
48         fdarray__init(&evlist->pollfd, 64);
49         evlist->workload.pid = -1;
50         evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
51 }
52
53 struct perf_evlist *perf_evlist__new(void)
54 {
55         struct perf_evlist *evlist = zalloc(sizeof(*evlist));
56
57         if (evlist != NULL)
58                 perf_evlist__init(evlist, NULL, NULL);
59
60         return evlist;
61 }
62
63 struct perf_evlist *perf_evlist__new_default(void)
64 {
65         struct perf_evlist *evlist = perf_evlist__new();
66
67         if (evlist && perf_evlist__add_default(evlist)) {
68                 perf_evlist__delete(evlist);
69                 evlist = NULL;
70         }
71
72         return evlist;
73 }
74
75 struct perf_evlist *perf_evlist__new_dummy(void)
76 {
77         struct perf_evlist *evlist = perf_evlist__new();
78
79         if (evlist && perf_evlist__add_dummy(evlist)) {
80                 perf_evlist__delete(evlist);
81                 evlist = NULL;
82         }
83
84         return evlist;
85 }
86
87 /**
88  * perf_evlist__set_id_pos - set the positions of event ids.
89  * @evlist: selected event list
90  *
91  * Events with compatible sample types all have the same id_pos
92  * and is_pos.  For convenience, put a copy on evlist.
93  */
94 void perf_evlist__set_id_pos(struct perf_evlist *evlist)
95 {
96         struct perf_evsel *first = perf_evlist__first(evlist);
97
98         evlist->id_pos = first->id_pos;
99         evlist->is_pos = first->is_pos;
100 }
101
102 static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
103 {
104         struct perf_evsel *evsel;
105
106         evlist__for_each_entry(evlist, evsel)
107                 perf_evsel__calc_id_pos(evsel);
108
109         perf_evlist__set_id_pos(evlist);
110 }
111
112 static void perf_evlist__purge(struct perf_evlist *evlist)
113 {
114         struct perf_evsel *pos, *n;
115
116         evlist__for_each_entry_safe(evlist, n, pos) {
117                 list_del_init(&pos->node);
118                 pos->evlist = NULL;
119                 perf_evsel__delete(pos);
120         }
121
122         evlist->nr_entries = 0;
123 }
124
125 void perf_evlist__exit(struct perf_evlist *evlist)
126 {
127         zfree(&evlist->mmap);
128         zfree(&evlist->backward_mmap);
129         fdarray__exit(&evlist->pollfd);
130 }
131
132 void perf_evlist__delete(struct perf_evlist *evlist)
133 {
134         if (evlist == NULL)
135                 return;
136
137         perf_evlist__munmap(evlist);
138         perf_evlist__close(evlist);
139         cpu_map__put(evlist->cpus);
140         thread_map__put(evlist->threads);
141         evlist->cpus = NULL;
142         evlist->threads = NULL;
143         perf_evlist__purge(evlist);
144         perf_evlist__exit(evlist);
145         free(evlist);
146 }
147
148 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
149                                           struct perf_evsel *evsel)
150 {
151         /*
152          * We already have cpus for evsel (via PMU sysfs) so
153          * keep it, if there's no target cpu list defined.
154          */
155         if (!evsel->own_cpus || evlist->has_user_cpus) {
156                 cpu_map__put(evsel->cpus);
157                 evsel->cpus = cpu_map__get(evlist->cpus);
158         } else if (evsel->cpus != evsel->own_cpus) {
159                 cpu_map__put(evsel->cpus);
160                 evsel->cpus = cpu_map__get(evsel->own_cpus);
161         }
162
163         thread_map__put(evsel->threads);
164         evsel->threads = thread_map__get(evlist->threads);
165 }
166
167 static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
168 {
169         struct perf_evsel *evsel;
170
171         evlist__for_each_entry(evlist, evsel)
172                 __perf_evlist__propagate_maps(evlist, evsel);
173 }
174
175 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
176 {
177         entry->evlist = evlist;
178         list_add_tail(&entry->node, &evlist->entries);
179         entry->idx = evlist->nr_entries;
180         entry->tracking = !entry->idx;
181
182         if (!evlist->nr_entries++)
183                 perf_evlist__set_id_pos(evlist);
184
185         __perf_evlist__propagate_maps(evlist, entry);
186 }
187
188 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
189 {
190         evsel->evlist = NULL;
191         list_del_init(&evsel->node);
192         evlist->nr_entries -= 1;
193 }
194
195 void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
196                                    struct list_head *list)
197 {
198         struct perf_evsel *evsel, *temp;
199
200         __evlist__for_each_entry_safe(list, temp, evsel) {
201                 list_del_init(&evsel->node);
202                 perf_evlist__add(evlist, evsel);
203         }
204 }
205
206 void __perf_evlist__set_leader(struct list_head *list)
207 {
208         struct perf_evsel *evsel, *leader;
209
210         leader = list_entry(list->next, struct perf_evsel, node);
211         evsel = list_entry(list->prev, struct perf_evsel, node);
212
213         leader->nr_members = evsel->idx - leader->idx + 1;
214
215         __evlist__for_each_entry(list, evsel) {
216                 evsel->leader = leader;
217         }
218 }
219
220 void perf_evlist__set_leader(struct perf_evlist *evlist)
221 {
222         if (evlist->nr_entries) {
223                 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
224                 __perf_evlist__set_leader(&evlist->entries);
225         }
226 }
227
228 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr)
229 {
230         attr->precise_ip = 3;
231
232         while (attr->precise_ip != 0) {
233                 int fd = sys_perf_event_open(attr, 0, -1, -1, 0);
234                 if (fd != -1) {
235                         close(fd);
236                         break;
237                 }
238                 --attr->precise_ip;
239         }
240 }
241
242 int perf_evlist__add_default(struct perf_evlist *evlist)
243 {
244         struct perf_evsel *evsel = perf_evsel__new_cycles();
245
246         if (evsel == NULL)
247                 return -ENOMEM;
248
249         perf_evlist__add(evlist, evsel);
250         return 0;
251 }
252
253 int perf_evlist__add_dummy(struct perf_evlist *evlist)
254 {
255         struct perf_event_attr attr = {
256                 .type   = PERF_TYPE_SOFTWARE,
257                 .config = PERF_COUNT_SW_DUMMY,
258                 .size   = sizeof(attr), /* to capture ABI version */
259         };
260         struct perf_evsel *evsel = perf_evsel__new(&attr);
261
262         if (evsel == NULL)
263                 return -ENOMEM;
264
265         perf_evlist__add(evlist, evsel);
266         return 0;
267 }
268
269 static int perf_evlist__add_attrs(struct perf_evlist *evlist,
270                                   struct perf_event_attr *attrs, size_t nr_attrs)
271 {
272         struct perf_evsel *evsel, *n;
273         LIST_HEAD(head);
274         size_t i;
275
276         for (i = 0; i < nr_attrs; i++) {
277                 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
278                 if (evsel == NULL)
279                         goto out_delete_partial_list;
280                 list_add_tail(&evsel->node, &head);
281         }
282
283         perf_evlist__splice_list_tail(evlist, &head);
284
285         return 0;
286
287 out_delete_partial_list:
288         __evlist__for_each_entry_safe(&head, n, evsel)
289                 perf_evsel__delete(evsel);
290         return -1;
291 }
292
293 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
294                                      struct perf_event_attr *attrs, size_t nr_attrs)
295 {
296         size_t i;
297
298         for (i = 0; i < nr_attrs; i++)
299                 event_attr_init(attrs + i);
300
301         return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
302 }
303
304 struct perf_evsel *
305 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
306 {
307         struct perf_evsel *evsel;
308
309         evlist__for_each_entry(evlist, evsel) {
310                 if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
311                     (int)evsel->attr.config == id)
312                         return evsel;
313         }
314
315         return NULL;
316 }
317
318 struct perf_evsel *
319 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
320                                      const char *name)
321 {
322         struct perf_evsel *evsel;
323
324         evlist__for_each_entry(evlist, evsel) {
325                 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
326                     (strcmp(evsel->name, name) == 0))
327                         return evsel;
328         }
329
330         return NULL;
331 }
332
333 int perf_evlist__add_newtp(struct perf_evlist *evlist,
334                            const char *sys, const char *name, void *handler)
335 {
336         struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
337
338         if (IS_ERR(evsel))
339                 return -1;
340
341         evsel->handler = handler;
342         perf_evlist__add(evlist, evsel);
343         return 0;
344 }
345
346 static int perf_evlist__nr_threads(struct perf_evlist *evlist,
347                                    struct perf_evsel *evsel)
348 {
349         if (evsel->system_wide)
350                 return 1;
351         else
352                 return thread_map__nr(evlist->threads);
353 }
354
355 void perf_evlist__disable(struct perf_evlist *evlist)
356 {
357         struct perf_evsel *pos;
358
359         evlist__for_each_entry(evlist, pos) {
360                 if (!perf_evsel__is_group_leader(pos) || !pos->fd)
361                         continue;
362                 perf_evsel__disable(pos);
363         }
364
365         evlist->enabled = false;
366 }
367
368 void perf_evlist__enable(struct perf_evlist *evlist)
369 {
370         struct perf_evsel *pos;
371
372         evlist__for_each_entry(evlist, pos) {
373                 if (!perf_evsel__is_group_leader(pos) || !pos->fd)
374                         continue;
375                 perf_evsel__enable(pos);
376         }
377
378         evlist->enabled = true;
379 }
380
381 void perf_evlist__toggle_enable(struct perf_evlist *evlist)
382 {
383         (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
384 }
385
386 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
387                                          struct perf_evsel *evsel, int cpu)
388 {
389         int thread;
390         int nr_threads = perf_evlist__nr_threads(evlist, evsel);
391
392         if (!evsel->fd)
393                 return -EINVAL;
394
395         for (thread = 0; thread < nr_threads; thread++) {
396                 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
397                 if (err)
398                         return err;
399         }
400         return 0;
401 }
402
403 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
404                                             struct perf_evsel *evsel,
405                                             int thread)
406 {
407         int cpu;
408         int nr_cpus = cpu_map__nr(evlist->cpus);
409
410         if (!evsel->fd)
411                 return -EINVAL;
412
413         for (cpu = 0; cpu < nr_cpus; cpu++) {
414                 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
415                 if (err)
416                         return err;
417         }
418         return 0;
419 }
420
421 int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
422                                   struct perf_evsel *evsel, int idx)
423 {
424         bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);
425
426         if (per_cpu_mmaps)
427                 return perf_evlist__enable_event_cpu(evlist, evsel, idx);
428         else
429                 return perf_evlist__enable_event_thread(evlist, evsel, idx);
430 }
431
432 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
433 {
434         int nr_cpus = cpu_map__nr(evlist->cpus);
435         int nr_threads = thread_map__nr(evlist->threads);
436         int nfds = 0;
437         struct perf_evsel *evsel;
438
439         evlist__for_each_entry(evlist, evsel) {
440                 if (evsel->system_wide)
441                         nfds += nr_cpus;
442                 else
443                         nfds += nr_cpus * nr_threads;
444         }
445
446         if (fdarray__available_entries(&evlist->pollfd) < nfds &&
447             fdarray__grow(&evlist->pollfd, nfds) < 0)
448                 return -ENOMEM;
449
450         return 0;
451 }
452
453 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
454                                      struct perf_mmap *map, short revent)
455 {
456         int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
457         /*
458          * Save the idx so that when we filter out fds POLLHUP'ed we can
459          * close the associated evlist->mmap[] entry.
460          */
461         if (pos >= 0) {
462                 evlist->pollfd.priv[pos].ptr = map;
463
464                 fcntl(fd, F_SETFL, O_NONBLOCK);
465         }
466
467         return pos;
468 }
469
470 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
471 {
472         return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
473 }
474
475 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
476                                          void *arg __maybe_unused)
477 {
478         struct perf_mmap *map = fda->priv[fd].ptr;
479
480         if (map)
481                 perf_mmap__put(map);
482 }
483
484 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
485 {
486         return fdarray__filter(&evlist->pollfd, revents_and_mask,
487                                perf_evlist__munmap_filtered, NULL);
488 }
489
490 int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
491 {
492         return fdarray__poll(&evlist->pollfd, timeout);
493 }
494
495 static void perf_evlist__id_hash(struct perf_evlist *evlist,
496                                  struct perf_evsel *evsel,
497                                  int cpu, int thread, u64 id)
498 {
499         int hash;
500         struct perf_sample_id *sid = SID(evsel, cpu, thread);
501
502         sid->id = id;
503         sid->evsel = evsel;
504         hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
505         hlist_add_head(&sid->node, &evlist->heads[hash]);
506 }
507
508 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
509                          int cpu, int thread, u64 id)
510 {
511         perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
512         evsel->id[evsel->ids++] = id;
513 }
514
515 int perf_evlist__id_add_fd(struct perf_evlist *evlist,
516                            struct perf_evsel *evsel,
517                            int cpu, int thread, int fd)
518 {
519         u64 read_data[4] = { 0, };
520         int id_idx = 1; /* The first entry is the counter value */
521         u64 id;
522         int ret;
523
524         ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
525         if (!ret)
526                 goto add;
527
528         if (errno != ENOTTY)
529                 return -1;
530
531         /* Legacy way to get event id.. All hail to old kernels! */
532
533         /*
534          * This way does not work with group format read, so bail
535          * out in that case.
536          */
537         if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
538                 return -1;
539
540         if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
541             read(fd, &read_data, sizeof(read_data)) == -1)
542                 return -1;
543
544         if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
545                 ++id_idx;
546         if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
547                 ++id_idx;
548
549         id = read_data[id_idx];
550
551  add:
552         perf_evlist__id_add(evlist, evsel, cpu, thread, id);
553         return 0;
554 }
555
556 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
557                                      struct perf_evsel *evsel, int idx, int cpu,
558                                      int thread)
559 {
560         struct perf_sample_id *sid = SID(evsel, cpu, thread);
561         sid->idx = idx;
562         if (evlist->cpus && cpu >= 0)
563                 sid->cpu = evlist->cpus->map[cpu];
564         else
565                 sid->cpu = -1;
566         if (!evsel->system_wide && evlist->threads && thread >= 0)
567                 sid->tid = thread_map__pid(evlist->threads, thread);
568         else
569                 sid->tid = -1;
570 }
571
572 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
573 {
574         struct hlist_head *head;
575         struct perf_sample_id *sid;
576         int hash;
577
578         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
579         head = &evlist->heads[hash];
580
581         hlist_for_each_entry(sid, head, node)
582                 if (sid->id == id)
583                         return sid;
584
585         return NULL;
586 }
587
588 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
589 {
590         struct perf_sample_id *sid;
591
592         if (evlist->nr_entries == 1 || !id)
593                 return perf_evlist__first(evlist);
594
595         sid = perf_evlist__id2sid(evlist, id);
596         if (sid)
597                 return sid->evsel;
598
599         if (!perf_evlist__sample_id_all(evlist))
600                 return perf_evlist__first(evlist);
601
602         return NULL;
603 }
604
605 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
606                                                 u64 id)
607 {
608         struct perf_sample_id *sid;
609
610         if (!id)
611                 return NULL;
612
613         sid = perf_evlist__id2sid(evlist, id);
614         if (sid)
615                 return sid->evsel;
616
617         return NULL;
618 }
619
620 static int perf_evlist__event2id(struct perf_evlist *evlist,
621                                  union perf_event *event, u64 *id)
622 {
623         const u64 *array = event->sample.array;
624         ssize_t n;
625
626         n = (event->header.size - sizeof(event->header)) >> 3;
627
628         if (event->header.type == PERF_RECORD_SAMPLE) {
629                 if (evlist->id_pos >= n)
630                         return -1;
631                 *id = array[evlist->id_pos];
632         } else {
633                 if (evlist->is_pos > n)
634                         return -1;
635                 n -= evlist->is_pos;
636                 *id = array[n];
637         }
638         return 0;
639 }
640
641 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
642                                             union perf_event *event)
643 {
644         struct perf_evsel *first = perf_evlist__first(evlist);
645         struct hlist_head *head;
646         struct perf_sample_id *sid;
647         int hash;
648         u64 id;
649
650         if (evlist->nr_entries == 1)
651                 return first;
652
653         if (!first->attr.sample_id_all &&
654             event->header.type != PERF_RECORD_SAMPLE)
655                 return first;
656
657         if (perf_evlist__event2id(evlist, event, &id))
658                 return NULL;
659
660         /* Synthesized events have an id of zero */
661         if (!id)
662                 return first;
663
664         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
665         head = &evlist->heads[hash];
666
667         hlist_for_each_entry(sid, head, node) {
668                 if (sid->id == id)
669                         return sid->evsel;
670         }
671         return NULL;
672 }
673
674 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
675 {
676         int i;
677
678         if (!evlist->backward_mmap)
679                 return 0;
680
681         for (i = 0; i < evlist->nr_mmaps; i++) {
682                 int fd = evlist->backward_mmap[i].fd;
683                 int err;
684
685                 if (fd < 0)
686                         continue;
687                 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
688                 if (err)
689                         return err;
690         }
691         return 0;
692 }
693
694 static int perf_evlist__pause(struct perf_evlist *evlist)
695 {
696         return perf_evlist__set_paused(evlist, true);
697 }
698
699 static int perf_evlist__resume(struct perf_evlist *evlist)
700 {
701         return perf_evlist__set_paused(evlist, false);
702 }
703
704 /* When check_messup is true, 'end' must points to a good entry */
705 static union perf_event *
706 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start,
707                 u64 end, u64 *prev)
708 {
709         unsigned char *data = md->base + page_size;
710         union perf_event *event = NULL;
711         int diff = end - start;
712
713         if (check_messup) {
714                 /*
715                  * If we're further behind than half the buffer, there's a chance
716                  * the writer will bite our tail and mess up the samples under us.
717                  *
718                  * If we somehow ended up ahead of the 'end', we got messed up.
719                  *
720                  * In either case, truncate and restart at 'end'.
721                  */
722                 if (diff > md->mask / 2 || diff < 0) {
723                         fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
724
725                         /*
726                          * 'end' points to a known good entry, start there.
727                          */
728                         start = end;
729                         diff = 0;
730                 }
731         }
732
733         if (diff >= (int)sizeof(event->header)) {
734                 size_t size;
735
736                 event = (union perf_event *)&data[start & md->mask];
737                 size = event->header.size;
738
739                 if (size < sizeof(event->header) || diff < (int)size) {
740                         event = NULL;
741                         goto broken_event;
742                 }
743
744                 /*
745                  * Event straddles the mmap boundary -- header should always
746                  * be inside due to u64 alignment of output.
747                  */
748                 if ((start & md->mask) + size != ((start + size) & md->mask)) {
749                         unsigned int offset = start;
750                         unsigned int len = min(sizeof(*event), size), cpy;
751                         void *dst = md->event_copy;
752
753                         do {
754                                 cpy = min(md->mask + 1 - (offset & md->mask), len);
755                                 memcpy(dst, &data[offset & md->mask], cpy);
756                                 offset += cpy;
757                                 dst += cpy;
758                                 len -= cpy;
759                         } while (len);
760
761                         event = (union perf_event *) md->event_copy;
762                 }
763
764                 start += size;
765         }
766
767 broken_event:
768         if (prev)
769                 *prev = start;
770
771         return event;
772 }
773
774 union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup)
775 {
776         u64 head;
777         u64 old = md->prev;
778
779         /*
780          * Check if event was unmapped due to a POLLHUP/POLLERR.
781          */
782         if (!refcount_read(&md->refcnt))
783                 return NULL;
784
785         head = perf_mmap__read_head(md);
786
787         return perf_mmap__read(md, check_messup, old, head, &md->prev);
788 }
789
790 union perf_event *
791 perf_mmap__read_backward(struct perf_mmap *md)
792 {
793         u64 head, end;
794         u64 start = md->prev;
795
796         /*
797          * Check if event was unmapped due to a POLLHUP/POLLERR.
798          */
799         if (!refcount_read(&md->refcnt))
800                 return NULL;
801
802         head = perf_mmap__read_head(md);
803         if (!head)
804                 return NULL;
805
806         /*
807          * 'head' pointer starts from 0. Kernel minus sizeof(record) form
808          * it each time when kernel writes to it, so in fact 'head' is
809          * negative. 'end' pointer is made manually by adding the size of
810          * the ring buffer to 'head' pointer, means the validate data can
811          * read is the whole ring buffer. If 'end' is positive, the ring
812          * buffer has not fully filled, so we must adjust 'end' to 0.
813          *
814          * However, since both 'head' and 'end' is unsigned, we can't
815          * simply compare 'end' against 0. Here we compare '-head' and
816          * the size of the ring buffer, where -head is the number of bytes
817          * kernel write to the ring buffer.
818          */
819         if (-head < (u64)(md->mask + 1))
820                 end = 0;
821         else
822                 end = head + md->mask + 1;
823
824         return perf_mmap__read(md, false, start, end, &md->prev);
825 }
826
827 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
828 {
829         struct perf_mmap *md = &evlist->mmap[idx];
830
831         /*
832          * Check messup is required for forward overwritable ring buffer:
833          * memory pointed by md->prev can be overwritten in this case.
834          * No need for read-write ring buffer: kernel stop outputting when
835          * it hit md->prev (perf_mmap__consume()).
836          */
837         return perf_mmap__read_forward(md, evlist->overwrite);
838 }
839
840 union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
841 {
842         struct perf_mmap *md = &evlist->mmap[idx];
843
844         /*
845          * No need to check messup for backward ring buffer:
846          * We can always read arbitrary long data from a backward
847          * ring buffer unless we forget to pause it before reading.
848          */
849         return perf_mmap__read_backward(md);
850 }
851
852 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
853 {
854         return perf_evlist__mmap_read_forward(evlist, idx);
855 }
856
857 void perf_mmap__read_catchup(struct perf_mmap *md)
858 {
859         u64 head;
860
861         if (!refcount_read(&md->refcnt))
862                 return;
863
864         head = perf_mmap__read_head(md);
865         md->prev = head;
866 }
867
868 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
869 {
870         perf_mmap__read_catchup(&evlist->mmap[idx]);
871 }
872
873 static bool perf_mmap__empty(struct perf_mmap *md)
874 {
875         return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
876 }
877
878 static void perf_mmap__get(struct perf_mmap *map)
879 {
880         refcount_inc(&map->refcnt);
881 }
882
883 static void perf_mmap__put(struct perf_mmap *md)
884 {
885         BUG_ON(md->base && refcount_read(&md->refcnt) == 0);
886
887         if (refcount_dec_and_test(&md->refcnt))
888                 perf_mmap__munmap(md);
889 }
890
891 void perf_mmap__consume(struct perf_mmap *md, bool overwrite)
892 {
893         if (!overwrite) {
894                 u64 old = md->prev;
895
896                 perf_mmap__write_tail(md, old);
897         }
898
899         if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md))
900                 perf_mmap__put(md);
901 }
902
903 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
904 {
905         perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite);
906 }
907
908 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
909                                struct auxtrace_mmap_params *mp __maybe_unused,
910                                void *userpg __maybe_unused,
911                                int fd __maybe_unused)
912 {
913         return 0;
914 }
915
916 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
917 {
918 }
919
920 void __weak auxtrace_mmap_params__init(
921                         struct auxtrace_mmap_params *mp __maybe_unused,
922                         off_t auxtrace_offset __maybe_unused,
923                         unsigned int auxtrace_pages __maybe_unused,
924                         bool auxtrace_overwrite __maybe_unused)
925 {
926 }
927
928 void __weak auxtrace_mmap_params__set_idx(
929                         struct auxtrace_mmap_params *mp __maybe_unused,
930                         struct perf_evlist *evlist __maybe_unused,
931                         int idx __maybe_unused,
932                         bool per_cpu __maybe_unused)
933 {
934 }
935
936 static void perf_mmap__munmap(struct perf_mmap *map)
937 {
938         if (map->base != NULL) {
939                 munmap(map->base, perf_mmap__mmap_len(map));
940                 map->base = NULL;
941                 map->fd = -1;
942                 refcount_set(&map->refcnt, 0);
943         }
944         auxtrace_mmap__munmap(&map->auxtrace_mmap);
945 }
946
947 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
948 {
949         int i;
950
951         if (evlist->mmap)
952                 for (i = 0; i < evlist->nr_mmaps; i++)
953                         perf_mmap__munmap(&evlist->mmap[i]);
954
955         if (evlist->backward_mmap)
956                 for (i = 0; i < evlist->nr_mmaps; i++)
957                         perf_mmap__munmap(&evlist->backward_mmap[i]);
958 }
959
960 void perf_evlist__munmap(struct perf_evlist *evlist)
961 {
962         perf_evlist__munmap_nofree(evlist);
963         zfree(&evlist->mmap);
964         zfree(&evlist->backward_mmap);
965 }
966
967 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
968 {
969         int i;
970         struct perf_mmap *map;
971
972         evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
973         if (cpu_map__empty(evlist->cpus))
974                 evlist->nr_mmaps = thread_map__nr(evlist->threads);
975         map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
976         if (!map)
977                 return NULL;
978
979         for (i = 0; i < evlist->nr_mmaps; i++) {
980                 map[i].fd = -1;
981                 /*
982                  * When the perf_mmap() call is made we grab one refcount, plus
983                  * one extra to let perf_evlist__mmap_consume() get the last
984                  * events after all real references (perf_mmap__get()) are
985                  * dropped.
986                  *
987                  * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
988                  * thus does perf_mmap__get() on it.
989                  */
990                 refcount_set(&map[i].refcnt, 0);
991         }
992         return map;
993 }
994
995 struct mmap_params {
996         int prot;
997         int mask;
998         struct auxtrace_mmap_params auxtrace_mp;
999 };
1000
1001 static int perf_mmap__mmap(struct perf_mmap *map,
1002                            struct mmap_params *mp, int fd)
1003 {
1004         /*
1005          * The last one will be done at perf_evlist__mmap_consume(), so that we
1006          * make sure we don't prevent tools from consuming every last event in
1007          * the ring buffer.
1008          *
1009          * I.e. we can get the POLLHUP meaning that the fd doesn't exist
1010          * anymore, but the last events for it are still in the ring buffer,
1011          * waiting to be consumed.
1012          *
1013          * Tools can chose to ignore this at their own discretion, but the
1014          * evlist layer can't just drop it when filtering events in
1015          * perf_evlist__filter_pollfd().
1016          */
1017         refcount_set(&map->refcnt, 2);
1018         map->prev = 0;
1019         map->mask = mp->mask;
1020         map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
1021                          MAP_SHARED, fd, 0);
1022         if (map->base == MAP_FAILED) {
1023                 pr_debug2("failed to mmap perf event ring buffer, error %d\n",
1024                           errno);
1025                 map->base = NULL;
1026                 return -1;
1027         }
1028         map->fd = fd;
1029
1030         if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
1031                                 &mp->auxtrace_mp, map->base, fd))
1032                 return -1;
1033
1034         return 0;
1035 }
1036
1037 static bool
1038 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
1039                          struct perf_evsel *evsel)
1040 {
1041         if (evsel->attr.write_backward)
1042                 return false;
1043         return true;
1044 }
1045
1046 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
1047                                        struct mmap_params *mp, int cpu_idx,
1048                                        int thread, int *_output, int *_output_backward)
1049 {
1050         struct perf_evsel *evsel;
1051         int revent;
1052         int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
1053
1054         evlist__for_each_entry(evlist, evsel) {
1055                 struct perf_mmap *maps = evlist->mmap;
1056                 int *output = _output;
1057                 int fd;
1058                 int cpu;
1059
1060                 if (evsel->attr.write_backward) {
1061                         output = _output_backward;
1062                         maps = evlist->backward_mmap;
1063
1064                         if (!maps) {
1065                                 maps = perf_evlist__alloc_mmap(evlist);
1066                                 if (!maps)
1067                                         return -1;
1068                                 evlist->backward_mmap = maps;
1069                                 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
1070                                         perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
1071                         }
1072                 }
1073
1074                 if (evsel->system_wide && thread)
1075                         continue;
1076
1077                 cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
1078                 if (cpu == -1)
1079                         continue;
1080
1081                 fd = FD(evsel, cpu, thread);
1082
1083                 if (*output == -1) {
1084                         *output = fd;
1085
1086                         if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)
1087                                 return -1;
1088                 } else {
1089                         if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
1090                                 return -1;
1091
1092                         perf_mmap__get(&maps[idx]);
1093                 }
1094
1095                 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
1096
1097                 /*
1098                  * The system_wide flag causes a selected event to be opened
1099                  * always without a pid.  Consequently it will never get a
1100                  * POLLHUP, but it is used for tracking in combination with
1101                  * other events, so it should not need to be polled anyway.
1102                  * Therefore don't add it for polling.
1103                  */
1104                 if (!evsel->system_wide &&
1105                     __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
1106                         perf_mmap__put(&maps[idx]);
1107                         return -1;
1108                 }
1109
1110                 if (evsel->attr.read_format & PERF_FORMAT_ID) {
1111                         if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
1112                                                    fd) < 0)
1113                                 return -1;
1114                         perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
1115                                                  thread);
1116                 }
1117         }
1118
1119         return 0;
1120 }
1121
1122 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
1123                                      struct mmap_params *mp)
1124 {
1125         int cpu, thread;
1126         int nr_cpus = cpu_map__nr(evlist->cpus);
1127         int nr_threads = thread_map__nr(evlist->threads);
1128
1129         pr_debug2("perf event ring buffer mmapped per cpu\n");
1130         for (cpu = 0; cpu < nr_cpus; cpu++) {
1131                 int output = -1;
1132                 int output_backward = -1;
1133
1134                 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
1135                                               true);
1136
1137                 for (thread = 0; thread < nr_threads; thread++) {
1138                         if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
1139                                                         thread, &output, &output_backward))
1140                                 goto out_unmap;
1141                 }
1142         }
1143
1144         return 0;
1145
1146 out_unmap:
1147         perf_evlist__munmap_nofree(evlist);
1148         return -1;
1149 }
1150
1151 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
1152                                         struct mmap_params *mp)
1153 {
1154         int thread;
1155         int nr_threads = thread_map__nr(evlist->threads);
1156
1157         pr_debug2("perf event ring buffer mmapped per thread\n");
1158         for (thread = 0; thread < nr_threads; thread++) {
1159                 int output = -1;
1160                 int output_backward = -1;
1161
1162                 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
1163                                               false);
1164
1165                 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
1166                                                 &output, &output_backward))
1167                         goto out_unmap;
1168         }
1169
1170         return 0;
1171
1172 out_unmap:
1173         perf_evlist__munmap_nofree(evlist);
1174         return -1;
1175 }
1176
1177 unsigned long perf_event_mlock_kb_in_pages(void)
1178 {
1179         unsigned long pages;
1180         int max;
1181
1182         if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
1183                 /*
1184                  * Pick a once upon a time good value, i.e. things look
1185                  * strange since we can't read a sysctl value, but lets not
1186                  * die yet...
1187                  */
1188                 max = 512;
1189         } else {
1190                 max -= (page_size / 1024);
1191         }
1192
1193         pages = (max * 1024) / page_size;
1194         if (!is_power_of_2(pages))
1195                 pages = rounddown_pow_of_two(pages);
1196
1197         return pages;
1198 }
1199
1200 size_t perf_evlist__mmap_size(unsigned long pages)
1201 {
1202         if (pages == UINT_MAX)
1203                 pages = perf_event_mlock_kb_in_pages();
1204         else if (!is_power_of_2(pages))
1205                 return 0;
1206
1207         return (pages + 1) * page_size;
1208 }
1209
1210 static long parse_pages_arg(const char *str, unsigned long min,
1211                             unsigned long max)
1212 {
1213         unsigned long pages, val;
1214         static struct parse_tag tags[] = {
1215                 { .tag  = 'B', .mult = 1       },
1216                 { .tag  = 'K', .mult = 1 << 10 },
1217                 { .tag  = 'M', .mult = 1 << 20 },
1218                 { .tag  = 'G', .mult = 1 << 30 },
1219                 { .tag  = 0 },
1220         };
1221
1222         if (str == NULL)
1223                 return -EINVAL;
1224
1225         val = parse_tag_value(str, tags);
1226         if (val != (unsigned long) -1) {
1227                 /* we got file size value */
1228                 pages = PERF_ALIGN(val, page_size) / page_size;
1229         } else {
1230                 /* we got pages count value */
1231                 char *eptr;
1232                 pages = strtoul(str, &eptr, 10);
1233                 if (*eptr != '\0')
1234                         return -EINVAL;
1235         }
1236
1237         if (pages == 0 && min == 0) {
1238                 /* leave number of pages at 0 */
1239         } else if (!is_power_of_2(pages)) {
1240                 char buf[100];
1241
1242                 /* round pages up to next power of 2 */
1243                 pages = roundup_pow_of_two(pages);
1244                 if (!pages)
1245                         return -EINVAL;
1246
1247                 unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
1248                 pr_info("rounding mmap pages size to %s (%lu pages)\n",
1249                         buf, pages);
1250         }
1251
1252         if (pages > max)
1253                 return -EINVAL;
1254
1255         return pages;
1256 }
1257
1258 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
1259 {
1260         unsigned long max = UINT_MAX;
1261         long pages;
1262
1263         if (max > SIZE_MAX / page_size)
1264                 max = SIZE_MAX / page_size;
1265
1266         pages = parse_pages_arg(str, 1, max);
1267         if (pages < 0) {
1268                 pr_err("Invalid argument for --mmap_pages/-m\n");
1269                 return -1;
1270         }
1271
1272         *mmap_pages = pages;
1273         return 0;
1274 }
1275
1276 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1277                                   int unset __maybe_unused)
1278 {
1279         return __perf_evlist__parse_mmap_pages(opt->value, str);
1280 }
1281
1282 /**
1283  * perf_evlist__mmap_ex - Create mmaps to receive events.
1284  * @evlist: list of events
1285  * @pages: map length in pages
1286  * @overwrite: overwrite older events?
1287  * @auxtrace_pages - auxtrace map length in pages
1288  * @auxtrace_overwrite - overwrite older auxtrace data?
1289  *
1290  * If @overwrite is %false the user needs to signal event consumption using
1291  * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
1292  * automatically.
1293  *
1294  * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
1295  * consumption using auxtrace_mmap__write_tail().
1296  *
1297  * Return: %0 on success, negative error code otherwise.
1298  */
1299 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1300                          bool overwrite, unsigned int auxtrace_pages,
1301                          bool auxtrace_overwrite)
1302 {
1303         struct perf_evsel *evsel;
1304         const struct cpu_map *cpus = evlist->cpus;
1305         const struct thread_map *threads = evlist->threads;
1306         struct mmap_params mp = {
1307                 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
1308         };
1309
1310         if (!evlist->mmap)
1311                 evlist->mmap = perf_evlist__alloc_mmap(evlist);
1312         if (!evlist->mmap)
1313                 return -ENOMEM;
1314
1315         if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1316                 return -ENOMEM;
1317
1318         evlist->overwrite = overwrite;
1319         evlist->mmap_len = perf_evlist__mmap_size(pages);
1320         pr_debug("mmap size %zuB\n", evlist->mmap_len);
1321         mp.mask = evlist->mmap_len - page_size - 1;
1322
1323         auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
1324                                    auxtrace_pages, auxtrace_overwrite);
1325
1326         evlist__for_each_entry(evlist, evsel) {
1327                 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1328                     evsel->sample_id == NULL &&
1329                     perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1330                         return -ENOMEM;
1331         }
1332
1333         if (cpu_map__empty(cpus))
1334                 return perf_evlist__mmap_per_thread(evlist, &mp);
1335
1336         return perf_evlist__mmap_per_cpu(evlist, &mp);
1337 }
1338
1339 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1340                       bool overwrite)
1341 {
1342         return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
1343 }
1344
1345 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1346 {
1347         struct cpu_map *cpus;
1348         struct thread_map *threads;
1349
1350         threads = thread_map__new_str(target->pid, target->tid, target->uid);
1351
1352         if (!threads)
1353                 return -1;
1354
1355         if (target__uses_dummy_map(target))
1356                 cpus = cpu_map__dummy_new();
1357         else
1358                 cpus = cpu_map__new(target->cpu_list);
1359
1360         if (!cpus)
1361                 goto out_delete_threads;
1362
1363         evlist->has_user_cpus = !!target->cpu_list;
1364
1365         perf_evlist__set_maps(evlist, cpus, threads);
1366
1367         return 0;
1368
1369 out_delete_threads:
1370         thread_map__put(threads);
1371         return -1;
1372 }
1373
1374 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
1375                            struct thread_map *threads)
1376 {
1377         /*
1378          * Allow for the possibility that one or another of the maps isn't being
1379          * changed i.e. don't put it.  Note we are assuming the maps that are
1380          * being applied are brand new and evlist is taking ownership of the
1381          * original reference count of 1.  If that is not the case it is up to
1382          * the caller to increase the reference count.
1383          */
1384         if (cpus != evlist->cpus) {
1385                 cpu_map__put(evlist->cpus);
1386                 evlist->cpus = cpu_map__get(cpus);
1387         }
1388
1389         if (threads != evlist->threads) {
1390                 thread_map__put(evlist->threads);
1391                 evlist->threads = thread_map__get(threads);
1392         }
1393
1394         perf_evlist__propagate_maps(evlist);
1395 }
1396
1397 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
1398                                    enum perf_event_sample_format bit)
1399 {
1400         struct perf_evsel *evsel;
1401
1402         evlist__for_each_entry(evlist, evsel)
1403                 __perf_evsel__set_sample_bit(evsel, bit);
1404 }
1405
1406 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
1407                                      enum perf_event_sample_format bit)
1408 {
1409         struct perf_evsel *evsel;
1410
1411         evlist__for_each_entry(evlist, evsel)
1412                 __perf_evsel__reset_sample_bit(evsel, bit);
1413 }
1414
1415 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1416 {
1417         struct perf_evsel *evsel;
1418         int err = 0;
1419         const int ncpus = cpu_map__nr(evlist->cpus),
1420                   nthreads = thread_map__nr(evlist->threads);
1421
1422         evlist__for_each_entry(evlist, evsel) {
1423                 if (evsel->filter == NULL)
1424                         continue;
1425
1426                 /*
1427                  * filters only work for tracepoint event, which doesn't have cpu limit.
1428                  * So evlist and evsel should always be same.
1429                  */
1430                 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
1431                 if (err) {
1432                         *err_evsel = evsel;
1433                         break;
1434                 }
1435         }
1436
1437         return err;
1438 }
1439
1440 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
1441 {
1442         struct perf_evsel *evsel;
1443         int err = 0;
1444
1445         evlist__for_each_entry(evlist, evsel) {
1446                 if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
1447                         continue;
1448
1449                 err = perf_evsel__set_filter(evsel, filter);
1450                 if (err)
1451                         break;
1452         }
1453
1454         return err;
1455 }
1456
1457 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1458 {
1459         char *filter;
1460         int ret = -1;
1461         size_t i;
1462
1463         for (i = 0; i < npids; ++i) {
1464                 if (i == 0) {
1465                         if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1466                                 return -1;
1467                 } else {
1468                         char *tmp;
1469
1470                         if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
1471                                 goto out_free;
1472
1473                         free(filter);
1474                         filter = tmp;
1475                 }
1476         }
1477
1478         ret = perf_evlist__set_filter(evlist, filter);
1479 out_free:
1480         free(filter);
1481         return ret;
1482 }
1483
1484 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
1485 {
1486         return perf_evlist__set_filter_pids(evlist, 1, &pid);
1487 }
1488
1489 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1490 {
1491         struct perf_evsel *pos;
1492
1493         if (evlist->nr_entries == 1)
1494                 return true;
1495
1496         if (evlist->id_pos < 0 || evlist->is_pos < 0)
1497                 return false;
1498
1499         evlist__for_each_entry(evlist, pos) {
1500                 if (pos->id_pos != evlist->id_pos ||
1501                     pos->is_pos != evlist->is_pos)
1502                         return false;
1503         }
1504
1505         return true;
1506 }
1507
1508 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1509 {
1510         struct perf_evsel *evsel;
1511
1512         if (evlist->combined_sample_type)
1513                 return evlist->combined_sample_type;
1514
1515         evlist__for_each_entry(evlist, evsel)
1516                 evlist->combined_sample_type |= evsel->attr.sample_type;
1517
1518         return evlist->combined_sample_type;
1519 }
1520
1521 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1522 {
1523         evlist->combined_sample_type = 0;
1524         return __perf_evlist__combined_sample_type(evlist);
1525 }
1526
1527 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
1528 {
1529         struct perf_evsel *evsel;
1530         u64 branch_type = 0;
1531
1532         evlist__for_each_entry(evlist, evsel)
1533                 branch_type |= evsel->attr.branch_sample_type;
1534         return branch_type;
1535 }
1536
1537 bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
1538 {
1539         struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1540         u64 read_format = first->attr.read_format;
1541         u64 sample_type = first->attr.sample_type;
1542
1543         evlist__for_each_entry(evlist, pos) {
1544                 if (read_format != pos->attr.read_format)
1545                         return false;
1546         }
1547
1548         /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
1549         if ((sample_type & PERF_SAMPLE_READ) &&
1550             !(read_format & PERF_FORMAT_ID)) {
1551                 return false;
1552         }
1553
1554         return true;
1555 }
1556
1557 u64 perf_evlist__read_format(struct perf_evlist *evlist)
1558 {
1559         struct perf_evsel *first = perf_evlist__first(evlist);
1560         return first->attr.read_format;
1561 }
1562
1563 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1564 {
1565         struct perf_evsel *first = perf_evlist__first(evlist);
1566         struct perf_sample *data;
1567         u64 sample_type;
1568         u16 size = 0;
1569
1570         if (!first->attr.sample_id_all)
1571                 goto out;
1572
1573         sample_type = first->attr.sample_type;
1574
1575         if (sample_type & PERF_SAMPLE_TID)
1576                 size += sizeof(data->tid) * 2;
1577
1578        if (sample_type & PERF_SAMPLE_TIME)
1579                 size += sizeof(data->time);
1580
1581         if (sample_type & PERF_SAMPLE_ID)
1582                 size += sizeof(data->id);
1583
1584         if (sample_type & PERF_SAMPLE_STREAM_ID)
1585                 size += sizeof(data->stream_id);
1586
1587         if (sample_type & PERF_SAMPLE_CPU)
1588                 size += sizeof(data->cpu) * 2;
1589
1590         if (sample_type & PERF_SAMPLE_IDENTIFIER)
1591                 size += sizeof(data->id);
1592 out:
1593         return size;
1594 }
1595
1596 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1597 {
1598         struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1599
1600         evlist__for_each_entry_continue(evlist, pos) {
1601                 if (first->attr.sample_id_all != pos->attr.sample_id_all)
1602                         return false;
1603         }
1604
1605         return true;
1606 }
1607
1608 bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1609 {
1610         struct perf_evsel *first = perf_evlist__first(evlist);
1611         return first->attr.sample_id_all;
1612 }
1613
1614 void perf_evlist__set_selected(struct perf_evlist *evlist,
1615                                struct perf_evsel *evsel)
1616 {
1617         evlist->selected = evsel;
1618 }
1619
1620 void perf_evlist__close(struct perf_evlist *evlist)
1621 {
1622         struct perf_evsel *evsel;
1623         int ncpus = cpu_map__nr(evlist->cpus);
1624         int nthreads = thread_map__nr(evlist->threads);
1625
1626         evlist__for_each_entry_reverse(evlist, evsel) {
1627                 int n = evsel->cpus ? evsel->cpus->nr : ncpus;
1628                 perf_evsel__close(evsel, n, nthreads);
1629         }
1630 }
1631
1632 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
1633 {
1634         struct cpu_map    *cpus;
1635         struct thread_map *threads;
1636         int err = -ENOMEM;
1637
1638         /*
1639          * Try reading /sys/devices/system/cpu/online to get
1640          * an all cpus map.
1641          *
1642          * FIXME: -ENOMEM is the best we can do here, the cpu_map
1643          * code needs an overhaul to properly forward the
1644          * error, and we may not want to do that fallback to a
1645          * default cpu identity map :-\
1646          */
1647         cpus = cpu_map__new(NULL);
1648         if (!cpus)
1649                 goto out;
1650
1651         threads = thread_map__new_dummy();
1652         if (!threads)
1653                 goto out_put;
1654
1655         perf_evlist__set_maps(evlist, cpus, threads);
1656 out:
1657         return err;
1658 out_put:
1659         cpu_map__put(cpus);
1660         goto out;
1661 }
1662
1663 int perf_evlist__open(struct perf_evlist *evlist)
1664 {
1665         struct perf_evsel *evsel;
1666         int err;
1667
1668         /*
1669          * Default: one fd per CPU, all threads, aka systemwide
1670          * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
1671          */
1672         if (evlist->threads == NULL && evlist->cpus == NULL) {
1673                 err = perf_evlist__create_syswide_maps(evlist);
1674                 if (err < 0)
1675                         goto out_err;
1676         }
1677
1678         perf_evlist__update_id_pos(evlist);
1679
1680         evlist__for_each_entry(evlist, evsel) {
1681                 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
1682                 if (err < 0)
1683                         goto out_err;
1684         }
1685
1686         return 0;
1687 out_err:
1688         perf_evlist__close(evlist);
1689         errno = -err;
1690         return err;
1691 }
1692
1693 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1694                                   const char *argv[], bool pipe_output,
1695                                   void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1696 {
1697         int child_ready_pipe[2], go_pipe[2];
1698         char bf;
1699
1700         if (pipe(child_ready_pipe) < 0) {
1701                 perror("failed to create 'ready' pipe");
1702                 return -1;
1703         }
1704
1705         if (pipe(go_pipe) < 0) {
1706                 perror("failed to create 'go' pipe");
1707                 goto out_close_ready_pipe;
1708         }
1709
1710         evlist->workload.pid = fork();
1711         if (evlist->workload.pid < 0) {
1712                 perror("failed to fork");
1713                 goto out_close_pipes;
1714         }
1715
1716         if (!evlist->workload.pid) {
1717                 int ret;
1718
1719                 if (pipe_output)
1720                         dup2(2, 1);
1721
1722                 signal(SIGTERM, SIG_DFL);
1723
1724                 close(child_ready_pipe[0]);
1725                 close(go_pipe[1]);
1726                 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
1727
1728                 /*
1729                  * Tell the parent we're ready to go
1730                  */
1731                 close(child_ready_pipe[1]);
1732
1733                 /*
1734                  * Wait until the parent tells us to go.
1735                  */
1736                 ret = read(go_pipe[0], &bf, 1);
1737                 /*
1738                  * The parent will ask for the execvp() to be performed by
1739                  * writing exactly one byte, in workload.cork_fd, usually via
1740                  * perf_evlist__start_workload().
1741                  *
1742                  * For cancelling the workload without actually running it,
1743                  * the parent will just close workload.cork_fd, without writing
1744                  * anything, i.e. read will return zero and we just exit()
1745                  * here.
1746                  */
1747                 if (ret != 1) {
1748                         if (ret == -1)
1749                                 perror("unable to read pipe");
1750                         exit(ret);
1751                 }
1752
1753                 execvp(argv[0], (char **)argv);
1754
1755                 if (exec_error) {
1756                         union sigval val;
1757
1758                         val.sival_int = errno;
1759                         if (sigqueue(getppid(), SIGUSR1, val))
1760                                 perror(argv[0]);
1761                 } else
1762                         perror(argv[0]);
1763                 exit(-1);
1764         }
1765
1766         if (exec_error) {
1767                 struct sigaction act = {
1768                         .sa_flags     = SA_SIGINFO,
1769                         .sa_sigaction = exec_error,
1770                 };
1771                 sigaction(SIGUSR1, &act, NULL);
1772         }
1773
1774         if (target__none(target)) {
1775                 if (evlist->threads == NULL) {
1776                         fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
1777                                 __func__, __LINE__);
1778                         goto out_close_pipes;
1779                 }
1780                 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1781         }
1782
1783         close(child_ready_pipe[1]);
1784         close(go_pipe[0]);
1785         /*
1786          * wait for child to settle
1787          */
1788         if (read(child_ready_pipe[0], &bf, 1) == -1) {
1789                 perror("unable to read pipe");
1790                 goto out_close_pipes;
1791         }
1792
1793         fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1794         evlist->workload.cork_fd = go_pipe[1];
1795         close(child_ready_pipe[0]);
1796         return 0;
1797
1798 out_close_pipes:
1799         close(go_pipe[0]);
1800         close(go_pipe[1]);
1801 out_close_ready_pipe:
1802         close(child_ready_pipe[0]);
1803         close(child_ready_pipe[1]);
1804         return -1;
1805 }
1806
1807 int perf_evlist__start_workload(struct perf_evlist *evlist)
1808 {
1809         if (evlist->workload.cork_fd > 0) {
1810                 char bf = 0;
1811                 int ret;
1812                 /*
1813                  * Remove the cork, let it rip!
1814                  */
1815                 ret = write(evlist->workload.cork_fd, &bf, 1);
1816                 if (ret < 0)
1817                         perror("unable to write to pipe");
1818
1819                 close(evlist->workload.cork_fd);
1820                 return ret;
1821         }
1822
1823         return 0;
1824 }
1825
1826 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1827                               struct perf_sample *sample)
1828 {
1829         struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
1830
1831         if (!evsel)
1832                 return -EFAULT;
1833         return perf_evsel__parse_sample(evsel, event, sample);
1834 }
1835
1836 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
1837 {
1838         struct perf_evsel *evsel;
1839         size_t printed = 0;
1840
1841         evlist__for_each_entry(evlist, evsel) {
1842                 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
1843                                    perf_evsel__name(evsel));
1844         }
1845
1846         return printed + fprintf(fp, "\n");
1847 }
1848
1849 int perf_evlist__strerror_open(struct perf_evlist *evlist,
1850                                int err, char *buf, size_t size)
1851 {
1852         int printed, value;
1853         char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1854
1855         switch (err) {
1856         case EACCES:
1857         case EPERM:
1858                 printed = scnprintf(buf, size,
1859                                     "Error:\t%s.\n"
1860                                     "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
1861
1862                 value = perf_event_paranoid();
1863
1864                 printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
1865
1866                 if (value >= 2) {
1867                         printed += scnprintf(buf + printed, size - printed,
1868                                              "For your workloads it needs to be <= 1\nHint:\t");
1869                 }
1870                 printed += scnprintf(buf + printed, size - printed,
1871                                      "For system wide tracing it needs to be set to -1.\n");
1872
1873                 printed += scnprintf(buf + printed, size - printed,
1874                                     "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1875                                     "Hint:\tThe current value is %d.", value);
1876                 break;
1877         case EINVAL: {
1878                 struct perf_evsel *first = perf_evlist__first(evlist);
1879                 int max_freq;
1880
1881                 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
1882                         goto out_default;
1883
1884                 if (first->attr.sample_freq < (u64)max_freq)
1885                         goto out_default;
1886
1887                 printed = scnprintf(buf, size,
1888                                     "Error:\t%s.\n"
1889                                     "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
1890                                     "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1891                                     emsg, max_freq, first->attr.sample_freq);
1892                 break;
1893         }
1894         default:
1895 out_default:
1896                 scnprintf(buf, size, "%s", emsg);
1897                 break;
1898         }
1899
1900         return 0;
1901 }
1902
1903 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
1904 {
1905         char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1906         int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1907
1908         switch (err) {
1909         case EPERM:
1910                 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1911                 printed += scnprintf(buf + printed, size - printed,
1912                                      "Error:\t%s.\n"
1913                                      "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1914                                      "Hint:\tTried using %zd kB.\n",
1915                                      emsg, pages_max_per_user, pages_attempted);
1916
1917                 if (pages_attempted >= pages_max_per_user) {
1918                         printed += scnprintf(buf + printed, size - printed,
1919                                              "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
1920                                              pages_max_per_user + pages_attempted);
1921                 }
1922
1923                 printed += scnprintf(buf + printed, size - printed,
1924                                      "Hint:\tTry using a smaller -m/--mmap-pages value.");
1925                 break;
1926         default:
1927                 scnprintf(buf, size, "%s", emsg);
1928                 break;
1929         }
1930
1931         return 0;
1932 }
1933
1934 void perf_evlist__to_front(struct perf_evlist *evlist,
1935                            struct perf_evsel *move_evsel)
1936 {
1937         struct perf_evsel *evsel, *n;
1938         LIST_HEAD(move);
1939
1940         if (move_evsel == perf_evlist__first(evlist))
1941                 return;
1942
1943         evlist__for_each_entry_safe(evlist, n, evsel) {
1944                 if (evsel->leader == move_evsel->leader)
1945                         list_move_tail(&evsel->node, &move);
1946         }
1947
1948         list_splice(&move, &evlist->entries);
1949 }
1950
1951 void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
1952                                      struct perf_evsel *tracking_evsel)
1953 {
1954         struct perf_evsel *evsel;
1955
1956         if (tracking_evsel->tracking)
1957                 return;
1958
1959         evlist__for_each_entry(evlist, evsel) {
1960                 if (evsel != tracking_evsel)
1961                         evsel->tracking = false;
1962         }
1963
1964         tracking_evsel->tracking = true;
1965 }
1966
1967 struct perf_evsel *
1968 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
1969                                const char *str)
1970 {
1971         struct perf_evsel *evsel;
1972
1973         evlist__for_each_entry(evlist, evsel) {
1974                 if (!evsel->name)
1975                         continue;
1976                 if (strcmp(str, evsel->name) == 0)
1977                         return evsel;
1978         }
1979
1980         return NULL;
1981 }
1982
1983 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
1984                                   enum bkw_mmap_state state)
1985 {
1986         enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
1987         enum action {
1988                 NONE,
1989                 PAUSE,
1990                 RESUME,
1991         } action = NONE;
1992
1993         if (!evlist->backward_mmap)
1994                 return;
1995
1996         switch (old_state) {
1997         case BKW_MMAP_NOTREADY: {
1998                 if (state != BKW_MMAP_RUNNING)
1999                         goto state_err;;
2000                 break;
2001         }
2002         case BKW_MMAP_RUNNING: {
2003                 if (state != BKW_MMAP_DATA_PENDING)
2004                         goto state_err;
2005                 action = PAUSE;
2006                 break;
2007         }
2008         case BKW_MMAP_DATA_PENDING: {
2009                 if (state != BKW_MMAP_EMPTY)
2010                         goto state_err;
2011                 break;
2012         }
2013         case BKW_MMAP_EMPTY: {
2014                 if (state != BKW_MMAP_RUNNING)
2015                         goto state_err;
2016                 action = RESUME;
2017                 break;
2018         }
2019         default:
2020                 WARN_ONCE(1, "Shouldn't get there\n");
2021         }
2022
2023         evlist->bkw_mmap_state = state;
2024
2025         switch (action) {
2026         case PAUSE:
2027                 perf_evlist__pause(evlist);
2028                 break;
2029         case RESUME:
2030                 perf_evlist__resume(evlist);
2031                 break;
2032         case NONE:
2033         default:
2034                 break;
2035         }
2036
2037 state_err:
2038         return;
2039 }