]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
perf tools: Add signal.h to places using its definitions
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "asm/bug.h"
42
43 #include <errno.h>
44 #include <inttypes.h>
45 #include <unistd.h>
46 #include <sched.h>
47 #include <signal.h>
48 #include <sys/mman.h>
49 #include <asm/bug.h>
50 #include <linux/time64.h>
51
52 struct switch_output {
53         bool             enabled;
54         bool             signal;
55         unsigned long    size;
56         unsigned long    time;
57         const char      *str;
58         bool             set;
59 };
60
61 struct record {
62         struct perf_tool        tool;
63         struct record_opts      opts;
64         u64                     bytes_written;
65         struct perf_data_file   file;
66         struct auxtrace_record  *itr;
67         struct perf_evlist      *evlist;
68         struct perf_session     *session;
69         const char              *progname;
70         int                     realtime_prio;
71         bool                    no_buildid;
72         bool                    no_buildid_set;
73         bool                    no_buildid_cache;
74         bool                    no_buildid_cache_set;
75         bool                    buildid_all;
76         bool                    timestamp_filename;
77         struct switch_output    switch_output;
78         unsigned long long      samples;
79 };
80
81 static volatile int auxtrace_record__snapshot_started;
82 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
83 static DEFINE_TRIGGER(switch_output_trigger);
84
85 static bool switch_output_signal(struct record *rec)
86 {
87         return rec->switch_output.signal &&
88                trigger_is_ready(&switch_output_trigger);
89 }
90
91 static bool switch_output_size(struct record *rec)
92 {
93         return rec->switch_output.size &&
94                trigger_is_ready(&switch_output_trigger) &&
95                (rec->bytes_written >= rec->switch_output.size);
96 }
97
98 static bool switch_output_time(struct record *rec)
99 {
100         return rec->switch_output.time &&
101                trigger_is_ready(&switch_output_trigger);
102 }
103
104 static int record__write(struct record *rec, void *bf, size_t size)
105 {
106         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
107                 pr_err("failed to write perf data, error: %m\n");
108                 return -1;
109         }
110
111         rec->bytes_written += size;
112
113         if (switch_output_size(rec))
114                 trigger_hit(&switch_output_trigger);
115
116         return 0;
117 }
118
119 static int process_synthesized_event(struct perf_tool *tool,
120                                      union perf_event *event,
121                                      struct perf_sample *sample __maybe_unused,
122                                      struct machine *machine __maybe_unused)
123 {
124         struct record *rec = container_of(tool, struct record, tool);
125         return record__write(rec, event, event->header.size);
126 }
127
128 static int
129 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
130 {
131         struct perf_event_header *pheader;
132         u64 evt_head = head;
133         int size = mask + 1;
134
135         pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
136         pheader = (struct perf_event_header *)(buf + (head & mask));
137         *start = head;
138         while (true) {
139                 if (evt_head - head >= (unsigned int)size) {
140                         pr_debug("Finished reading backward ring buffer: rewind\n");
141                         if (evt_head - head > (unsigned int)size)
142                                 evt_head -= pheader->size;
143                         *end = evt_head;
144                         return 0;
145                 }
146
147                 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
148
149                 if (pheader->size == 0) {
150                         pr_debug("Finished reading backward ring buffer: get start\n");
151                         *end = evt_head;
152                         return 0;
153                 }
154
155                 evt_head += pheader->size;
156                 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
157         }
158         WARN_ONCE(1, "Shouldn't get here\n");
159         return -1;
160 }
161
162 static int
163 rb_find_range(void *data, int mask, u64 head, u64 old,
164               u64 *start, u64 *end, bool backward)
165 {
166         if (!backward) {
167                 *start = old;
168                 *end = head;
169                 return 0;
170         }
171
172         return backward_rb_find_range(data, mask, head, start, end);
173 }
174
175 static int
176 record__mmap_read(struct record *rec, struct perf_mmap *md,
177                   bool overwrite, bool backward)
178 {
179         u64 head = perf_mmap__read_head(md);
180         u64 old = md->prev;
181         u64 end = head, start = old;
182         unsigned char *data = md->base + page_size;
183         unsigned long size;
184         void *buf;
185         int rc = 0;
186
187         if (rb_find_range(data, md->mask, head,
188                           old, &start, &end, backward))
189                 return -1;
190
191         if (start == end)
192                 return 0;
193
194         rec->samples++;
195
196         size = end - start;
197         if (size > (unsigned long)(md->mask) + 1) {
198                 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
199
200                 md->prev = head;
201                 perf_mmap__consume(md, overwrite || backward);
202                 return 0;
203         }
204
205         if ((start & md->mask) + size != (end & md->mask)) {
206                 buf = &data[start & md->mask];
207                 size = md->mask + 1 - (start & md->mask);
208                 start += size;
209
210                 if (record__write(rec, buf, size) < 0) {
211                         rc = -1;
212                         goto out;
213                 }
214         }
215
216         buf = &data[start & md->mask];
217         size = end - start;
218         start += size;
219
220         if (record__write(rec, buf, size) < 0) {
221                 rc = -1;
222                 goto out;
223         }
224
225         md->prev = head;
226         perf_mmap__consume(md, overwrite || backward);
227 out:
228         return rc;
229 }
230
231 static volatile int done;
232 static volatile int signr = -1;
233 static volatile int child_finished;
234
235 static void sig_handler(int sig)
236 {
237         if (sig == SIGCHLD)
238                 child_finished = 1;
239         else
240                 signr = sig;
241
242         done = 1;
243 }
244
245 static void sigsegv_handler(int sig)
246 {
247         perf_hooks__recover();
248         sighandler_dump_stack(sig);
249 }
250
251 static void record__sig_exit(void)
252 {
253         if (signr == -1)
254                 return;
255
256         signal(signr, SIG_DFL);
257         raise(signr);
258 }
259
260 #ifdef HAVE_AUXTRACE_SUPPORT
261
262 static int record__process_auxtrace(struct perf_tool *tool,
263                                     union perf_event *event, void *data1,
264                                     size_t len1, void *data2, size_t len2)
265 {
266         struct record *rec = container_of(tool, struct record, tool);
267         struct perf_data_file *file = &rec->file;
268         size_t padding;
269         u8 pad[8] = {0};
270
271         if (!perf_data_file__is_pipe(file)) {
272                 off_t file_offset;
273                 int fd = perf_data_file__fd(file);
274                 int err;
275
276                 file_offset = lseek(fd, 0, SEEK_CUR);
277                 if (file_offset == -1)
278                         return -1;
279                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
280                                                      event, file_offset);
281                 if (err)
282                         return err;
283         }
284
285         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
286         padding = (len1 + len2) & 7;
287         if (padding)
288                 padding = 8 - padding;
289
290         record__write(rec, event, event->header.size);
291         record__write(rec, data1, len1);
292         if (len2)
293                 record__write(rec, data2, len2);
294         record__write(rec, &pad, padding);
295
296         return 0;
297 }
298
299 static int record__auxtrace_mmap_read(struct record *rec,
300                                       struct auxtrace_mmap *mm)
301 {
302         int ret;
303
304         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
305                                   record__process_auxtrace);
306         if (ret < 0)
307                 return ret;
308
309         if (ret)
310                 rec->samples++;
311
312         return 0;
313 }
314
315 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
316                                                struct auxtrace_mmap *mm)
317 {
318         int ret;
319
320         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
321                                            record__process_auxtrace,
322                                            rec->opts.auxtrace_snapshot_size);
323         if (ret < 0)
324                 return ret;
325
326         if (ret)
327                 rec->samples++;
328
329         return 0;
330 }
331
332 static int record__auxtrace_read_snapshot_all(struct record *rec)
333 {
334         int i;
335         int rc = 0;
336
337         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
338                 struct auxtrace_mmap *mm =
339                                 &rec->evlist->mmap[i].auxtrace_mmap;
340
341                 if (!mm->base)
342                         continue;
343
344                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
345                         rc = -1;
346                         goto out;
347                 }
348         }
349 out:
350         return rc;
351 }
352
353 static void record__read_auxtrace_snapshot(struct record *rec)
354 {
355         pr_debug("Recording AUX area tracing snapshot\n");
356         if (record__auxtrace_read_snapshot_all(rec) < 0) {
357                 trigger_error(&auxtrace_snapshot_trigger);
358         } else {
359                 if (auxtrace_record__snapshot_finish(rec->itr))
360                         trigger_error(&auxtrace_snapshot_trigger);
361                 else
362                         trigger_ready(&auxtrace_snapshot_trigger);
363         }
364 }
365
366 #else
367
368 static inline
369 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
370                                struct auxtrace_mmap *mm __maybe_unused)
371 {
372         return 0;
373 }
374
375 static inline
376 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
377 {
378 }
379
380 static inline
381 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
382 {
383         return 0;
384 }
385
386 #endif
387
388 static int record__mmap_evlist(struct record *rec,
389                                struct perf_evlist *evlist)
390 {
391         struct record_opts *opts = &rec->opts;
392         char msg[512];
393
394         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
395                                  opts->auxtrace_mmap_pages,
396                                  opts->auxtrace_snapshot_mode) < 0) {
397                 if (errno == EPERM) {
398                         pr_err("Permission error mapping pages.\n"
399                                "Consider increasing "
400                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
401                                "or try again with a smaller value of -m/--mmap_pages.\n"
402                                "(current value: %u,%u)\n",
403                                opts->mmap_pages, opts->auxtrace_mmap_pages);
404                         return -errno;
405                 } else {
406                         pr_err("failed to mmap with %d (%s)\n", errno,
407                                 str_error_r(errno, msg, sizeof(msg)));
408                         if (errno)
409                                 return -errno;
410                         else
411                                 return -EINVAL;
412                 }
413         }
414         return 0;
415 }
416
417 static int record__mmap(struct record *rec)
418 {
419         return record__mmap_evlist(rec, rec->evlist);
420 }
421
422 static int record__open(struct record *rec)
423 {
424         char msg[BUFSIZ];
425         struct perf_evsel *pos;
426         struct perf_evlist *evlist = rec->evlist;
427         struct perf_session *session = rec->session;
428         struct record_opts *opts = &rec->opts;
429         struct perf_evsel_config_term *err_term;
430         int rc = 0;
431
432         perf_evlist__config(evlist, opts, &callchain_param);
433
434         evlist__for_each_entry(evlist, pos) {
435 try_again:
436                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
437                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
438                                 if (verbose > 0)
439                                         ui__warning("%s\n", msg);
440                                 goto try_again;
441                         }
442
443                         rc = -errno;
444                         perf_evsel__open_strerror(pos, &opts->target,
445                                                   errno, msg, sizeof(msg));
446                         ui__error("%s\n", msg);
447                         goto out;
448                 }
449         }
450
451         if (perf_evlist__apply_filters(evlist, &pos)) {
452                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
453                         pos->filter, perf_evsel__name(pos), errno,
454                         str_error_r(errno, msg, sizeof(msg)));
455                 rc = -1;
456                 goto out;
457         }
458
459         if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
460                 error("failed to set config \"%s\" on event %s with %d (%s)\n",
461                       err_term->val.drv_cfg, perf_evsel__name(pos), errno,
462                       str_error_r(errno, msg, sizeof(msg)));
463                 rc = -1;
464                 goto out;
465         }
466
467         rc = record__mmap(rec);
468         if (rc)
469                 goto out;
470
471         session->evlist = evlist;
472         perf_session__set_id_hdr_size(session);
473 out:
474         return rc;
475 }
476
477 static int process_sample_event(struct perf_tool *tool,
478                                 union perf_event *event,
479                                 struct perf_sample *sample,
480                                 struct perf_evsel *evsel,
481                                 struct machine *machine)
482 {
483         struct record *rec = container_of(tool, struct record, tool);
484
485         rec->samples++;
486
487         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
488 }
489
490 static int process_buildids(struct record *rec)
491 {
492         struct perf_data_file *file  = &rec->file;
493         struct perf_session *session = rec->session;
494
495         if (file->size == 0)
496                 return 0;
497
498         /*
499          * During this process, it'll load kernel map and replace the
500          * dso->long_name to a real pathname it found.  In this case
501          * we prefer the vmlinux path like
502          *   /lib/modules/3.16.4/build/vmlinux
503          *
504          * rather than build-id path (in debug directory).
505          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
506          */
507         symbol_conf.ignore_vmlinux_buildid = true;
508
509         /*
510          * If --buildid-all is given, it marks all DSO regardless of hits,
511          * so no need to process samples.
512          */
513         if (rec->buildid_all)
514                 rec->tool.sample = NULL;
515
516         return perf_session__process_events(session);
517 }
518
519 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
520 {
521         int err;
522         struct perf_tool *tool = data;
523         /*
524          *As for guest kernel when processing subcommand record&report,
525          *we arrange module mmap prior to guest kernel mmap and trigger
526          *a preload dso because default guest module symbols are loaded
527          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
528          *method is used to avoid symbol missing when the first addr is
529          *in module instead of in guest kernel.
530          */
531         err = perf_event__synthesize_modules(tool, process_synthesized_event,
532                                              machine);
533         if (err < 0)
534                 pr_err("Couldn't record guest kernel [%d]'s reference"
535                        " relocation symbol.\n", machine->pid);
536
537         /*
538          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
539          * have no _text sometimes.
540          */
541         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
542                                                  machine);
543         if (err < 0)
544                 pr_err("Couldn't record guest kernel [%d]'s reference"
545                        " relocation symbol.\n", machine->pid);
546 }
547
548 static struct perf_event_header finished_round_event = {
549         .size = sizeof(struct perf_event_header),
550         .type = PERF_RECORD_FINISHED_ROUND,
551 };
552
553 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
554                                     bool backward)
555 {
556         u64 bytes_written = rec->bytes_written;
557         int i;
558         int rc = 0;
559         struct perf_mmap *maps;
560
561         if (!evlist)
562                 return 0;
563
564         maps = backward ? evlist->backward_mmap : evlist->mmap;
565         if (!maps)
566                 return 0;
567
568         if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
569                 return 0;
570
571         for (i = 0; i < evlist->nr_mmaps; i++) {
572                 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
573
574                 if (maps[i].base) {
575                         if (record__mmap_read(rec, &maps[i],
576                                               evlist->overwrite, backward) != 0) {
577                                 rc = -1;
578                                 goto out;
579                         }
580                 }
581
582                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
583                     record__auxtrace_mmap_read(rec, mm) != 0) {
584                         rc = -1;
585                         goto out;
586                 }
587         }
588
589         /*
590          * Mark the round finished in case we wrote
591          * at least one event.
592          */
593         if (bytes_written != rec->bytes_written)
594                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
595
596         if (backward)
597                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
598 out:
599         return rc;
600 }
601
602 static int record__mmap_read_all(struct record *rec)
603 {
604         int err;
605
606         err = record__mmap_read_evlist(rec, rec->evlist, false);
607         if (err)
608                 return err;
609
610         return record__mmap_read_evlist(rec, rec->evlist, true);
611 }
612
613 static void record__init_features(struct record *rec)
614 {
615         struct perf_session *session = rec->session;
616         int feat;
617
618         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
619                 perf_header__set_feat(&session->header, feat);
620
621         if (rec->no_buildid)
622                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
623
624         if (!have_tracepoints(&rec->evlist->entries))
625                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
626
627         if (!rec->opts.branch_stack)
628                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
629
630         if (!rec->opts.full_auxtrace)
631                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
632
633         perf_header__clear_feat(&session->header, HEADER_STAT);
634 }
635
636 static void
637 record__finish_output(struct record *rec)
638 {
639         struct perf_data_file *file = &rec->file;
640         int fd = perf_data_file__fd(file);
641
642         if (file->is_pipe)
643                 return;
644
645         rec->session->header.data_size += rec->bytes_written;
646         file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
647
648         if (!rec->no_buildid) {
649                 process_buildids(rec);
650
651                 if (rec->buildid_all)
652                         dsos__hit_all(rec->session);
653         }
654         perf_session__write_header(rec->session, rec->evlist, fd, true);
655
656         return;
657 }
658
659 static int record__synthesize_workload(struct record *rec, bool tail)
660 {
661         int err;
662         struct thread_map *thread_map;
663
664         if (rec->opts.tail_synthesize != tail)
665                 return 0;
666
667         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
668         if (thread_map == NULL)
669                 return -1;
670
671         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
672                                                  process_synthesized_event,
673                                                  &rec->session->machines.host,
674                                                  rec->opts.sample_address,
675                                                  rec->opts.proc_map_timeout);
676         thread_map__put(thread_map);
677         return err;
678 }
679
680 static int record__synthesize(struct record *rec, bool tail);
681
682 static int
683 record__switch_output(struct record *rec, bool at_exit)
684 {
685         struct perf_data_file *file = &rec->file;
686         int fd, err;
687
688         /* Same Size:      "2015122520103046"*/
689         char timestamp[] = "InvalidTimestamp";
690
691         record__synthesize(rec, true);
692         if (target__none(&rec->opts.target))
693                 record__synthesize_workload(rec, true);
694
695         rec->samples = 0;
696         record__finish_output(rec);
697         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
698         if (err) {
699                 pr_err("Failed to get current timestamp\n");
700                 return -EINVAL;
701         }
702
703         fd = perf_data_file__switch(file, timestamp,
704                                     rec->session->header.data_offset,
705                                     at_exit);
706         if (fd >= 0 && !at_exit) {
707                 rec->bytes_written = 0;
708                 rec->session->header.data_size = 0;
709         }
710
711         if (!quiet)
712                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
713                         file->path, timestamp);
714
715         /* Output tracking events */
716         if (!at_exit) {
717                 record__synthesize(rec, false);
718
719                 /*
720                  * In 'perf record --switch-output' without -a,
721                  * record__synthesize() in record__switch_output() won't
722                  * generate tracking events because there's no thread_map
723                  * in evlist. Which causes newly created perf.data doesn't
724                  * contain map and comm information.
725                  * Create a fake thread_map and directly call
726                  * perf_event__synthesize_thread_map() for those events.
727                  */
728                 if (target__none(&rec->opts.target))
729                         record__synthesize_workload(rec, false);
730         }
731         return fd;
732 }
733
734 static volatile int workload_exec_errno;
735
736 /*
737  * perf_evlist__prepare_workload will send a SIGUSR1
738  * if the fork fails, since we asked by setting its
739  * want_signal to true.
740  */
741 static void workload_exec_failed_signal(int signo __maybe_unused,
742                                         siginfo_t *info,
743                                         void *ucontext __maybe_unused)
744 {
745         workload_exec_errno = info->si_value.sival_int;
746         done = 1;
747         child_finished = 1;
748 }
749
750 static void snapshot_sig_handler(int sig);
751 static void alarm_sig_handler(int sig);
752
753 int __weak
754 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
755                             struct perf_tool *tool __maybe_unused,
756                             perf_event__handler_t process __maybe_unused,
757                             struct machine *machine __maybe_unused)
758 {
759         return 0;
760 }
761
762 static const struct perf_event_mmap_page *
763 perf_evlist__pick_pc(struct perf_evlist *evlist)
764 {
765         if (evlist) {
766                 if (evlist->mmap && evlist->mmap[0].base)
767                         return evlist->mmap[0].base;
768                 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
769                         return evlist->backward_mmap[0].base;
770         }
771         return NULL;
772 }
773
774 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
775 {
776         const struct perf_event_mmap_page *pc;
777
778         pc = perf_evlist__pick_pc(rec->evlist);
779         if (pc)
780                 return pc;
781         return NULL;
782 }
783
784 static int record__synthesize(struct record *rec, bool tail)
785 {
786         struct perf_session *session = rec->session;
787         struct machine *machine = &session->machines.host;
788         struct perf_data_file *file = &rec->file;
789         struct record_opts *opts = &rec->opts;
790         struct perf_tool *tool = &rec->tool;
791         int fd = perf_data_file__fd(file);
792         int err = 0;
793
794         if (rec->opts.tail_synthesize != tail)
795                 return 0;
796
797         if (file->is_pipe) {
798                 err = perf_event__synthesize_attrs(tool, session,
799                                                    process_synthesized_event);
800                 if (err < 0) {
801                         pr_err("Couldn't synthesize attrs.\n");
802                         goto out;
803                 }
804
805                 if (have_tracepoints(&rec->evlist->entries)) {
806                         /*
807                          * FIXME err <= 0 here actually means that
808                          * there were no tracepoints so its not really
809                          * an error, just that we don't need to
810                          * synthesize anything.  We really have to
811                          * return this more properly and also
812                          * propagate errors that now are calling die()
813                          */
814                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
815                                                                   process_synthesized_event);
816                         if (err <= 0) {
817                                 pr_err("Couldn't record tracing data.\n");
818                                 goto out;
819                         }
820                         rec->bytes_written += err;
821                 }
822         }
823
824         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
825                                           process_synthesized_event, machine);
826         if (err)
827                 goto out;
828
829         if (rec->opts.full_auxtrace) {
830                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
831                                         session, process_synthesized_event);
832                 if (err)
833                         goto out;
834         }
835
836         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
837                                                  machine);
838         WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
839                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
840                            "Check /proc/kallsyms permission or run as root.\n");
841
842         err = perf_event__synthesize_modules(tool, process_synthesized_event,
843                                              machine);
844         WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
845                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
846                            "Check /proc/modules permission or run as root.\n");
847
848         if (perf_guest) {
849                 machines__process_guests(&session->machines,
850                                          perf_event__synthesize_guest_os, tool);
851         }
852
853         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
854                                             process_synthesized_event, opts->sample_address,
855                                             opts->proc_map_timeout);
856 out:
857         return err;
858 }
859
860 static int __cmd_record(struct record *rec, int argc, const char **argv)
861 {
862         int err;
863         int status = 0;
864         unsigned long waking = 0;
865         const bool forks = argc > 0;
866         struct machine *machine;
867         struct perf_tool *tool = &rec->tool;
868         struct record_opts *opts = &rec->opts;
869         struct perf_data_file *file = &rec->file;
870         struct perf_session *session;
871         bool disabled = false, draining = false;
872         int fd;
873
874         rec->progname = argv[0];
875
876         atexit(record__sig_exit);
877         signal(SIGCHLD, sig_handler);
878         signal(SIGINT, sig_handler);
879         signal(SIGTERM, sig_handler);
880         signal(SIGSEGV, sigsegv_handler);
881
882         if (rec->opts.record_namespaces)
883                 tool->namespace_events = true;
884
885         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
886                 signal(SIGUSR2, snapshot_sig_handler);
887                 if (rec->opts.auxtrace_snapshot_mode)
888                         trigger_on(&auxtrace_snapshot_trigger);
889                 if (rec->switch_output.enabled)
890                         trigger_on(&switch_output_trigger);
891         } else {
892                 signal(SIGUSR2, SIG_IGN);
893         }
894
895         session = perf_session__new(file, false, tool);
896         if (session == NULL) {
897                 pr_err("Perf session creation failed.\n");
898                 return -1;
899         }
900
901         fd = perf_data_file__fd(file);
902         rec->session = session;
903
904         record__init_features(rec);
905
906         if (forks) {
907                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
908                                                     argv, file->is_pipe,
909                                                     workload_exec_failed_signal);
910                 if (err < 0) {
911                         pr_err("Couldn't run the workload!\n");
912                         status = err;
913                         goto out_delete_session;
914                 }
915         }
916
917         if (record__open(rec) != 0) {
918                 err = -1;
919                 goto out_child;
920         }
921
922         err = bpf__apply_obj_config();
923         if (err) {
924                 char errbuf[BUFSIZ];
925
926                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
927                 pr_err("ERROR: Apply config to BPF failed: %s\n",
928                          errbuf);
929                 goto out_child;
930         }
931
932         /*
933          * Normally perf_session__new would do this, but it doesn't have the
934          * evlist.
935          */
936         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
937                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
938                 rec->tool.ordered_events = false;
939         }
940
941         if (!rec->evlist->nr_groups)
942                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
943
944         if (file->is_pipe) {
945                 err = perf_header__write_pipe(fd);
946                 if (err < 0)
947                         goto out_child;
948         } else {
949                 err = perf_session__write_header(session, rec->evlist, fd, false);
950                 if (err < 0)
951                         goto out_child;
952         }
953
954         if (!rec->no_buildid
955             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
956                 pr_err("Couldn't generate buildids. "
957                        "Use --no-buildid to profile anyway.\n");
958                 err = -1;
959                 goto out_child;
960         }
961
962         machine = &session->machines.host;
963
964         err = record__synthesize(rec, false);
965         if (err < 0)
966                 goto out_child;
967
968         if (rec->realtime_prio) {
969                 struct sched_param param;
970
971                 param.sched_priority = rec->realtime_prio;
972                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
973                         pr_err("Could not set realtime priority.\n");
974                         err = -1;
975                         goto out_child;
976                 }
977         }
978
979         /*
980          * When perf is starting the traced process, all the events
981          * (apart from group members) have enable_on_exec=1 set,
982          * so don't spoil it by prematurely enabling them.
983          */
984         if (!target__none(&opts->target) && !opts->initial_delay)
985                 perf_evlist__enable(rec->evlist);
986
987         /*
988          * Let the child rip
989          */
990         if (forks) {
991                 union perf_event *event;
992                 pid_t tgid;
993
994                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
995                 if (event == NULL) {
996                         err = -ENOMEM;
997                         goto out_child;
998                 }
999
1000                 /*
1001                  * Some H/W events are generated before COMM event
1002                  * which is emitted during exec(), so perf script
1003                  * cannot see a correct process name for those events.
1004                  * Synthesize COMM event to prevent it.
1005                  */
1006                 tgid = perf_event__synthesize_comm(tool, event,
1007                                                    rec->evlist->workload.pid,
1008                                                    process_synthesized_event,
1009                                                    machine);
1010                 free(event);
1011
1012                 if (tgid == -1)
1013                         goto out_child;
1014
1015                 event = malloc(sizeof(event->namespaces) +
1016                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1017                                machine->id_hdr_size);
1018                 if (event == NULL) {
1019                         err = -ENOMEM;
1020                         goto out_child;
1021                 }
1022
1023                 /*
1024                  * Synthesize NAMESPACES event for the command specified.
1025                  */
1026                 perf_event__synthesize_namespaces(tool, event,
1027                                                   rec->evlist->workload.pid,
1028                                                   tgid, process_synthesized_event,
1029                                                   machine);
1030                 free(event);
1031
1032                 perf_evlist__start_workload(rec->evlist);
1033         }
1034
1035         if (opts->initial_delay) {
1036                 usleep(opts->initial_delay * USEC_PER_MSEC);
1037                 perf_evlist__enable(rec->evlist);
1038         }
1039
1040         trigger_ready(&auxtrace_snapshot_trigger);
1041         trigger_ready(&switch_output_trigger);
1042         perf_hooks__invoke_record_start();
1043         for (;;) {
1044                 unsigned long long hits = rec->samples;
1045
1046                 /*
1047                  * rec->evlist->bkw_mmap_state is possible to be
1048                  * BKW_MMAP_EMPTY here: when done == true and
1049                  * hits != rec->samples in previous round.
1050                  *
1051                  * perf_evlist__toggle_bkw_mmap ensure we never
1052                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1053                  */
1054                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1055                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1056
1057                 if (record__mmap_read_all(rec) < 0) {
1058                         trigger_error(&auxtrace_snapshot_trigger);
1059                         trigger_error(&switch_output_trigger);
1060                         err = -1;
1061                         goto out_child;
1062                 }
1063
1064                 if (auxtrace_record__snapshot_started) {
1065                         auxtrace_record__snapshot_started = 0;
1066                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1067                                 record__read_auxtrace_snapshot(rec);
1068                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1069                                 pr_err("AUX area tracing snapshot failed\n");
1070                                 err = -1;
1071                                 goto out_child;
1072                         }
1073                 }
1074
1075                 if (trigger_is_hit(&switch_output_trigger)) {
1076                         /*
1077                          * If switch_output_trigger is hit, the data in
1078                          * overwritable ring buffer should have been collected,
1079                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1080                          *
1081                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1082                          * record__mmap_read_all() didn't collect data from
1083                          * overwritable ring buffer. Read again.
1084                          */
1085                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1086                                 continue;
1087                         trigger_ready(&switch_output_trigger);
1088
1089                         /*
1090                          * Reenable events in overwrite ring buffer after
1091                          * record__mmap_read_all(): we should have collected
1092                          * data from it.
1093                          */
1094                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1095
1096                         if (!quiet)
1097                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1098                                         waking);
1099                         waking = 0;
1100                         fd = record__switch_output(rec, false);
1101                         if (fd < 0) {
1102                                 pr_err("Failed to switch to new file\n");
1103                                 trigger_error(&switch_output_trigger);
1104                                 err = fd;
1105                                 goto out_child;
1106                         }
1107
1108                         /* re-arm the alarm */
1109                         if (rec->switch_output.time)
1110                                 alarm(rec->switch_output.time);
1111                 }
1112
1113                 if (hits == rec->samples) {
1114                         if (done || draining)
1115                                 break;
1116                         err = perf_evlist__poll(rec->evlist, -1);
1117                         /*
1118                          * Propagate error, only if there's any. Ignore positive
1119                          * number of returned events and interrupt error.
1120                          */
1121                         if (err > 0 || (err < 0 && errno == EINTR))
1122                                 err = 0;
1123                         waking++;
1124
1125                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1126                                 draining = true;
1127                 }
1128
1129                 /*
1130                  * When perf is starting the traced process, at the end events
1131                  * die with the process and we wait for that. Thus no need to
1132                  * disable events in this case.
1133                  */
1134                 if (done && !disabled && !target__none(&opts->target)) {
1135                         trigger_off(&auxtrace_snapshot_trigger);
1136                         perf_evlist__disable(rec->evlist);
1137                         disabled = true;
1138                 }
1139         }
1140         trigger_off(&auxtrace_snapshot_trigger);
1141         trigger_off(&switch_output_trigger);
1142
1143         if (forks && workload_exec_errno) {
1144                 char msg[STRERR_BUFSIZE];
1145                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1146                 pr_err("Workload failed: %s\n", emsg);
1147                 err = -1;
1148                 goto out_child;
1149         }
1150
1151         if (!quiet)
1152                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1153
1154         if (target__none(&rec->opts.target))
1155                 record__synthesize_workload(rec, true);
1156
1157 out_child:
1158         if (forks) {
1159                 int exit_status;
1160
1161                 if (!child_finished)
1162                         kill(rec->evlist->workload.pid, SIGTERM);
1163
1164                 wait(&exit_status);
1165
1166                 if (err < 0)
1167                         status = err;
1168                 else if (WIFEXITED(exit_status))
1169                         status = WEXITSTATUS(exit_status);
1170                 else if (WIFSIGNALED(exit_status))
1171                         signr = WTERMSIG(exit_status);
1172         } else
1173                 status = err;
1174
1175         record__synthesize(rec, true);
1176         /* this will be recalculated during process_buildids() */
1177         rec->samples = 0;
1178
1179         if (!err) {
1180                 if (!rec->timestamp_filename) {
1181                         record__finish_output(rec);
1182                 } else {
1183                         fd = record__switch_output(rec, true);
1184                         if (fd < 0) {
1185                                 status = fd;
1186                                 goto out_delete_session;
1187                         }
1188                 }
1189         }
1190
1191         perf_hooks__invoke_record_end();
1192
1193         if (!err && !quiet) {
1194                 char samples[128];
1195                 const char *postfix = rec->timestamp_filename ?
1196                                         ".<timestamp>" : "";
1197
1198                 if (rec->samples && !rec->opts.full_auxtrace)
1199                         scnprintf(samples, sizeof(samples),
1200                                   " (%" PRIu64 " samples)", rec->samples);
1201                 else
1202                         samples[0] = '\0';
1203
1204                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1205                         perf_data_file__size(file) / 1024.0 / 1024.0,
1206                         file->path, postfix, samples);
1207         }
1208
1209 out_delete_session:
1210         perf_session__delete(session);
1211         return status;
1212 }
1213
1214 static void callchain_debug(struct callchain_param *callchain)
1215 {
1216         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1217
1218         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1219
1220         if (callchain->record_mode == CALLCHAIN_DWARF)
1221                 pr_debug("callchain: stack dump size %d\n",
1222                          callchain->dump_size);
1223 }
1224
1225 int record_opts__parse_callchain(struct record_opts *record,
1226                                  struct callchain_param *callchain,
1227                                  const char *arg, bool unset)
1228 {
1229         int ret;
1230         callchain->enabled = !unset;
1231
1232         /* --no-call-graph */
1233         if (unset) {
1234                 callchain->record_mode = CALLCHAIN_NONE;
1235                 pr_debug("callchain: disabled\n");
1236                 return 0;
1237         }
1238
1239         ret = parse_callchain_record_opt(arg, callchain);
1240         if (!ret) {
1241                 /* Enable data address sampling for DWARF unwind. */
1242                 if (callchain->record_mode == CALLCHAIN_DWARF)
1243                         record->sample_address = true;
1244                 callchain_debug(callchain);
1245         }
1246
1247         return ret;
1248 }
1249
1250 int record_parse_callchain_opt(const struct option *opt,
1251                                const char *arg,
1252                                int unset)
1253 {
1254         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1255 }
1256
1257 int record_callchain_opt(const struct option *opt,
1258                          const char *arg __maybe_unused,
1259                          int unset __maybe_unused)
1260 {
1261         struct callchain_param *callchain = opt->value;
1262
1263         callchain->enabled = true;
1264
1265         if (callchain->record_mode == CALLCHAIN_NONE)
1266                 callchain->record_mode = CALLCHAIN_FP;
1267
1268         callchain_debug(callchain);
1269         return 0;
1270 }
1271
1272 static int perf_record_config(const char *var, const char *value, void *cb)
1273 {
1274         struct record *rec = cb;
1275
1276         if (!strcmp(var, "record.build-id")) {
1277                 if (!strcmp(value, "cache"))
1278                         rec->no_buildid_cache = false;
1279                 else if (!strcmp(value, "no-cache"))
1280                         rec->no_buildid_cache = true;
1281                 else if (!strcmp(value, "skip"))
1282                         rec->no_buildid = true;
1283                 else
1284                         return -1;
1285                 return 0;
1286         }
1287         if (!strcmp(var, "record.call-graph"))
1288                 var = "call-graph.record-mode"; /* fall-through */
1289
1290         return perf_default_config(var, value, cb);
1291 }
1292
1293 struct clockid_map {
1294         const char *name;
1295         int clockid;
1296 };
1297
1298 #define CLOCKID_MAP(n, c)       \
1299         { .name = n, .clockid = (c), }
1300
1301 #define CLOCKID_END     { .name = NULL, }
1302
1303
1304 /*
1305  * Add the missing ones, we need to build on many distros...
1306  */
1307 #ifndef CLOCK_MONOTONIC_RAW
1308 #define CLOCK_MONOTONIC_RAW 4
1309 #endif
1310 #ifndef CLOCK_BOOTTIME
1311 #define CLOCK_BOOTTIME 7
1312 #endif
1313 #ifndef CLOCK_TAI
1314 #define CLOCK_TAI 11
1315 #endif
1316
1317 static const struct clockid_map clockids[] = {
1318         /* available for all events, NMI safe */
1319         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1320         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1321
1322         /* available for some events */
1323         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1324         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1325         CLOCKID_MAP("tai", CLOCK_TAI),
1326
1327         /* available for the lazy */
1328         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1329         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1330         CLOCKID_MAP("real", CLOCK_REALTIME),
1331         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1332
1333         CLOCKID_END,
1334 };
1335
1336 static int parse_clockid(const struct option *opt, const char *str, int unset)
1337 {
1338         struct record_opts *opts = (struct record_opts *)opt->value;
1339         const struct clockid_map *cm;
1340         const char *ostr = str;
1341
1342         if (unset) {
1343                 opts->use_clockid = 0;
1344                 return 0;
1345         }
1346
1347         /* no arg passed */
1348         if (!str)
1349                 return 0;
1350
1351         /* no setting it twice */
1352         if (opts->use_clockid)
1353                 return -1;
1354
1355         opts->use_clockid = true;
1356
1357         /* if its a number, we're done */
1358         if (sscanf(str, "%d", &opts->clockid) == 1)
1359                 return 0;
1360
1361         /* allow a "CLOCK_" prefix to the name */
1362         if (!strncasecmp(str, "CLOCK_", 6))
1363                 str += 6;
1364
1365         for (cm = clockids; cm->name; cm++) {
1366                 if (!strcasecmp(str, cm->name)) {
1367                         opts->clockid = cm->clockid;
1368                         return 0;
1369                 }
1370         }
1371
1372         opts->use_clockid = false;
1373         ui__warning("unknown clockid %s, check man page\n", ostr);
1374         return -1;
1375 }
1376
1377 static int record__parse_mmap_pages(const struct option *opt,
1378                                     const char *str,
1379                                     int unset __maybe_unused)
1380 {
1381         struct record_opts *opts = opt->value;
1382         char *s, *p;
1383         unsigned int mmap_pages;
1384         int ret;
1385
1386         if (!str)
1387                 return -EINVAL;
1388
1389         s = strdup(str);
1390         if (!s)
1391                 return -ENOMEM;
1392
1393         p = strchr(s, ',');
1394         if (p)
1395                 *p = '\0';
1396
1397         if (*s) {
1398                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1399                 if (ret)
1400                         goto out_free;
1401                 opts->mmap_pages = mmap_pages;
1402         }
1403
1404         if (!p) {
1405                 ret = 0;
1406                 goto out_free;
1407         }
1408
1409         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1410         if (ret)
1411                 goto out_free;
1412
1413         opts->auxtrace_mmap_pages = mmap_pages;
1414
1415 out_free:
1416         free(s);
1417         return ret;
1418 }
1419
1420 static void switch_output_size_warn(struct record *rec)
1421 {
1422         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1423         struct switch_output *s = &rec->switch_output;
1424
1425         wakeup_size /= 2;
1426
1427         if (s->size < wakeup_size) {
1428                 char buf[100];
1429
1430                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1431                 pr_warning("WARNING: switch-output data size lower than "
1432                            "wakeup kernel buffer size (%s) "
1433                            "expect bigger perf.data sizes\n", buf);
1434         }
1435 }
1436
1437 static int switch_output_setup(struct record *rec)
1438 {
1439         struct switch_output *s = &rec->switch_output;
1440         static struct parse_tag tags_size[] = {
1441                 { .tag  = 'B', .mult = 1       },
1442                 { .tag  = 'K', .mult = 1 << 10 },
1443                 { .tag  = 'M', .mult = 1 << 20 },
1444                 { .tag  = 'G', .mult = 1 << 30 },
1445                 { .tag  = 0 },
1446         };
1447         static struct parse_tag tags_time[] = {
1448                 { .tag  = 's', .mult = 1        },
1449                 { .tag  = 'm', .mult = 60       },
1450                 { .tag  = 'h', .mult = 60*60    },
1451                 { .tag  = 'd', .mult = 60*60*24 },
1452                 { .tag  = 0 },
1453         };
1454         unsigned long val;
1455
1456         if (!s->set)
1457                 return 0;
1458
1459         if (!strcmp(s->str, "signal")) {
1460                 s->signal = true;
1461                 pr_debug("switch-output with SIGUSR2 signal\n");
1462                 goto enabled;
1463         }
1464
1465         val = parse_tag_value(s->str, tags_size);
1466         if (val != (unsigned long) -1) {
1467                 s->size = val;
1468                 pr_debug("switch-output with %s size threshold\n", s->str);
1469                 goto enabled;
1470         }
1471
1472         val = parse_tag_value(s->str, tags_time);
1473         if (val != (unsigned long) -1) {
1474                 s->time = val;
1475                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1476                          s->str, s->time);
1477                 goto enabled;
1478         }
1479
1480         return -1;
1481
1482 enabled:
1483         rec->timestamp_filename = true;
1484         s->enabled              = true;
1485
1486         if (s->size && !rec->opts.no_buffering)
1487                 switch_output_size_warn(rec);
1488
1489         return 0;
1490 }
1491
1492 static const char * const __record_usage[] = {
1493         "perf record [<options>] [<command>]",
1494         "perf record [<options>] -- <command> [<options>]",
1495         NULL
1496 };
1497 const char * const *record_usage = __record_usage;
1498
1499 /*
1500  * XXX Ideally would be local to cmd_record() and passed to a record__new
1501  * because we need to have access to it in record__exit, that is called
1502  * after cmd_record() exits, but since record_options need to be accessible to
1503  * builtin-script, leave it here.
1504  *
1505  * At least we don't ouch it in all the other functions here directly.
1506  *
1507  * Just say no to tons of global variables, sigh.
1508  */
1509 static struct record record = {
1510         .opts = {
1511                 .sample_time         = true,
1512                 .mmap_pages          = UINT_MAX,
1513                 .user_freq           = UINT_MAX,
1514                 .user_interval       = ULLONG_MAX,
1515                 .freq                = 4000,
1516                 .target              = {
1517                         .uses_mmap   = true,
1518                         .default_per_cpu = true,
1519                 },
1520                 .proc_map_timeout     = 500,
1521         },
1522         .tool = {
1523                 .sample         = process_sample_event,
1524                 .fork           = perf_event__process_fork,
1525                 .exit           = perf_event__process_exit,
1526                 .comm           = perf_event__process_comm,
1527                 .namespaces     = perf_event__process_namespaces,
1528                 .mmap           = perf_event__process_mmap,
1529                 .mmap2          = perf_event__process_mmap2,
1530                 .ordered_events = true,
1531         },
1532 };
1533
1534 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1535         "\n\t\t\t\tDefault: fp";
1536
1537 static bool dry_run;
1538
1539 /*
1540  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1541  * with it and switch to use the library functions in perf_evlist that came
1542  * from builtin-record.c, i.e. use record_opts,
1543  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1544  * using pipes, etc.
1545  */
1546 static struct option __record_options[] = {
1547         OPT_CALLBACK('e', "event", &record.evlist, "event",
1548                      "event selector. use 'perf list' to list available events",
1549                      parse_events_option),
1550         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1551                      "event filter", parse_filter),
1552         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1553                            NULL, "don't record events from perf itself",
1554                            exclude_perf),
1555         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1556                     "record events on existing process id"),
1557         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1558                     "record events on existing thread id"),
1559         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1560                     "collect data with this RT SCHED_FIFO priority"),
1561         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1562                     "collect data without buffering"),
1563         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1564                     "collect raw sample records from all opened counters"),
1565         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1566                             "system-wide collection from all CPUs"),
1567         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1568                     "list of cpus to monitor"),
1569         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1570         OPT_STRING('o', "output", &record.file.path, "file",
1571                     "output file name"),
1572         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1573                         &record.opts.no_inherit_set,
1574                         "child tasks do not inherit counters"),
1575         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1576                     "synthesize non-sample events at the end of output"),
1577         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1578         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1579         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1580                      "number of mmap data pages and AUX area tracing mmap pages",
1581                      record__parse_mmap_pages),
1582         OPT_BOOLEAN(0, "group", &record.opts.group,
1583                     "put the counters into a counter group"),
1584         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1585                            NULL, "enables call-graph recording" ,
1586                            &record_callchain_opt),
1587         OPT_CALLBACK(0, "call-graph", &record.opts,
1588                      "record_mode[,record_size]", record_callchain_help,
1589                      &record_parse_callchain_opt),
1590         OPT_INCR('v', "verbose", &verbose,
1591                     "be more verbose (show counter open errors, etc)"),
1592         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1593         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1594                     "per thread counts"),
1595         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1596         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1597         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1598                         &record.opts.sample_time_set,
1599                         "Record the sample timestamps"),
1600         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1601         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1602                     "don't sample"),
1603         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1604                         &record.no_buildid_cache_set,
1605                         "do not update the buildid cache"),
1606         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1607                         &record.no_buildid_set,
1608                         "do not collect buildids in perf.data"),
1609         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1610                      "monitor event in cgroup name only",
1611                      parse_cgroups),
1612         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1613                   "ms to wait before starting measurement after program start"),
1614         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1615                    "user to profile"),
1616
1617         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1618                      "branch any", "sample any taken branches",
1619                      parse_branch_stack),
1620
1621         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1622                      "branch filter mask", "branch stack filter modes",
1623                      parse_branch_stack),
1624         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1625                     "sample by weight (on special events only)"),
1626         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1627                     "sample transaction flags (special events only)"),
1628         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1629                     "use per-thread mmaps"),
1630         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1631                     "sample selected machine registers on interrupt,"
1632                     " use -I ? to list register names", parse_regs),
1633         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1634                     "Record running/enabled time of read (:S) events"),
1635         OPT_CALLBACK('k', "clockid", &record.opts,
1636         "clockid", "clockid to use for events, see clock_gettime()",
1637         parse_clockid),
1638         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1639                           "opts", "AUX area tracing Snapshot Mode", ""),
1640         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1641                         "per thread proc mmap processing timeout in ms"),
1642         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1643                     "Record namespaces events"),
1644         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1645                     "Record context switch events"),
1646         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1647                          "Configure all used events to run in kernel space.",
1648                          PARSE_OPT_EXCLUSIVE),
1649         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1650                          "Configure all used events to run in user space.",
1651                          PARSE_OPT_EXCLUSIVE),
1652         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1653                    "clang binary to use for compiling BPF scriptlets"),
1654         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1655                    "options passed to clang when compiling BPF scriptlets"),
1656         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1657                    "file", "vmlinux pathname"),
1658         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1659                     "Record build-id of all DSOs regardless of hits"),
1660         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1661                     "append timestamp to output filename"),
1662         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1663                           &record.switch_output.set, "signal,size,time",
1664                           "Switch output when receive SIGUSR2 or cross size,time threshold",
1665                           "signal"),
1666         OPT_BOOLEAN(0, "dry-run", &dry_run,
1667                     "Parse options then exit"),
1668         OPT_END()
1669 };
1670
1671 struct option *record_options = __record_options;
1672
1673 int cmd_record(int argc, const char **argv)
1674 {
1675         int err;
1676         struct record *rec = &record;
1677         char errbuf[BUFSIZ];
1678
1679 #ifndef HAVE_LIBBPF_SUPPORT
1680 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1681         set_nobuild('\0', "clang-path", true);
1682         set_nobuild('\0', "clang-opt", true);
1683 # undef set_nobuild
1684 #endif
1685
1686 #ifndef HAVE_BPF_PROLOGUE
1687 # if !defined (HAVE_DWARF_SUPPORT)
1688 #  define REASON  "NO_DWARF=1"
1689 # elif !defined (HAVE_LIBBPF_SUPPORT)
1690 #  define REASON  "NO_LIBBPF=1"
1691 # else
1692 #  define REASON  "this architecture doesn't support BPF prologue"
1693 # endif
1694 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1695         set_nobuild('\0', "vmlinux", true);
1696 # undef set_nobuild
1697 # undef REASON
1698 #endif
1699
1700         rec->evlist = perf_evlist__new();
1701         if (rec->evlist == NULL)
1702                 return -ENOMEM;
1703
1704         err = perf_config(perf_record_config, rec);
1705         if (err)
1706                 return err;
1707
1708         argc = parse_options(argc, argv, record_options, record_usage,
1709                             PARSE_OPT_STOP_AT_NON_OPTION);
1710         if (quiet)
1711                 perf_quiet_option();
1712
1713         /* Make system wide (-a) the default target. */
1714         if (!argc && target__none(&rec->opts.target))
1715                 rec->opts.target.system_wide = true;
1716
1717         if (nr_cgroups && !rec->opts.target.system_wide) {
1718                 usage_with_options_msg(record_usage, record_options,
1719                         "cgroup monitoring only available in system-wide mode");
1720
1721         }
1722         if (rec->opts.record_switch_events &&
1723             !perf_can_record_switch_events()) {
1724                 ui__error("kernel does not support recording context switch events\n");
1725                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1726                 return -EINVAL;
1727         }
1728
1729         if (switch_output_setup(rec)) {
1730                 parse_options_usage(record_usage, record_options, "switch-output", 0);
1731                 return -EINVAL;
1732         }
1733
1734         if (rec->switch_output.time) {
1735                 signal(SIGALRM, alarm_sig_handler);
1736                 alarm(rec->switch_output.time);
1737         }
1738
1739         if (!rec->itr) {
1740                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1741                 if (err)
1742                         goto out;
1743         }
1744
1745         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1746                                               rec->opts.auxtrace_snapshot_opts);
1747         if (err)
1748                 goto out;
1749
1750         /*
1751          * Allow aliases to facilitate the lookup of symbols for address
1752          * filters. Refer to auxtrace_parse_filters().
1753          */
1754         symbol_conf.allow_aliases = true;
1755
1756         symbol__init(NULL);
1757
1758         err = auxtrace_parse_filters(rec->evlist);
1759         if (err)
1760                 goto out;
1761
1762         if (dry_run)
1763                 goto out;
1764
1765         err = bpf__setup_stdout(rec->evlist);
1766         if (err) {
1767                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1768                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1769                          errbuf);
1770                 goto out;
1771         }
1772
1773         err = -ENOMEM;
1774
1775         if (symbol_conf.kptr_restrict)
1776                 pr_warning(
1777 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1778 "check /proc/sys/kernel/kptr_restrict.\n\n"
1779 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1780 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1781 "Samples in kernel modules won't be resolved at all.\n\n"
1782 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1783 "even with a suitable vmlinux or kallsyms file.\n\n");
1784
1785         if (rec->no_buildid_cache || rec->no_buildid) {
1786                 disable_buildid_cache();
1787         } else if (rec->switch_output.enabled) {
1788                 /*
1789                  * In 'perf record --switch-output', disable buildid
1790                  * generation by default to reduce data file switching
1791                  * overhead. Still generate buildid if they are required
1792                  * explicitly using
1793                  *
1794                  *  perf record --switch-output --no-no-buildid \
1795                  *              --no-no-buildid-cache
1796                  *
1797                  * Following code equals to:
1798                  *
1799                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
1800                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1801                  *         disable_buildid_cache();
1802                  */
1803                 bool disable = true;
1804
1805                 if (rec->no_buildid_set && !rec->no_buildid)
1806                         disable = false;
1807                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1808                         disable = false;
1809                 if (disable) {
1810                         rec->no_buildid = true;
1811                         rec->no_buildid_cache = true;
1812                         disable_buildid_cache();
1813                 }
1814         }
1815
1816         if (record.opts.overwrite)
1817                 record.opts.tail_synthesize = true;
1818
1819         if (rec->evlist->nr_entries == 0 &&
1820             perf_evlist__add_default(rec->evlist) < 0) {
1821                 pr_err("Not enough memory for event selector list\n");
1822                 goto out;
1823         }
1824
1825         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1826                 rec->opts.no_inherit = true;
1827
1828         err = target__validate(&rec->opts.target);
1829         if (err) {
1830                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1831                 ui__warning("%s", errbuf);
1832         }
1833
1834         err = target__parse_uid(&rec->opts.target);
1835         if (err) {
1836                 int saved_errno = errno;
1837
1838                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1839                 ui__error("%s", errbuf);
1840
1841                 err = -saved_errno;
1842                 goto out;
1843         }
1844
1845         /* Enable ignoring missing threads when -u option is defined. */
1846         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1847
1848         err = -ENOMEM;
1849         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1850                 usage_with_options(record_usage, record_options);
1851
1852         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1853         if (err)
1854                 goto out;
1855
1856         /*
1857          * We take all buildids when the file contains
1858          * AUX area tracing data because we do not decode the
1859          * trace because it would take too long.
1860          */
1861         if (rec->opts.full_auxtrace)
1862                 rec->buildid_all = true;
1863
1864         if (record_opts__config(&rec->opts)) {
1865                 err = -EINVAL;
1866                 goto out;
1867         }
1868
1869         err = __cmd_record(&record, argc, argv);
1870 out:
1871         perf_evlist__delete(rec->evlist);
1872         symbol__exit();
1873         auxtrace_record__free(rec->itr);
1874         return err;
1875 }
1876
1877 static void snapshot_sig_handler(int sig __maybe_unused)
1878 {
1879         struct record *rec = &record;
1880
1881         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1882                 trigger_hit(&auxtrace_snapshot_trigger);
1883                 auxtrace_record__snapshot_started = 1;
1884                 if (auxtrace_record__snapshot_start(record.itr))
1885                         trigger_error(&auxtrace_snapshot_trigger);
1886         }
1887
1888         if (switch_output_signal(rec))
1889                 trigger_hit(&switch_output_trigger);
1890 }
1891
1892 static void alarm_sig_handler(int sig __maybe_unused)
1893 {
1894         struct record *rec = &record;
1895
1896         if (switch_output_time(rec))
1897                 trigger_hit(&switch_output_trigger);
1898 }