]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "asm/bug.h"
42
43 #include <unistd.h>
44 #include <sched.h>
45 #include <sys/mman.h>
46 #include <asm/bug.h>
47 #include <linux/time64.h>
48
49 struct switch_output {
50         bool             enabled;
51         bool             signal;
52         unsigned long    size;
53         unsigned long    time;
54         const char      *str;
55         bool             set;
56 };
57
58 struct record {
59         struct perf_tool        tool;
60         struct record_opts      opts;
61         u64                     bytes_written;
62         struct perf_data_file   file;
63         struct auxtrace_record  *itr;
64         struct perf_evlist      *evlist;
65         struct perf_session     *session;
66         const char              *progname;
67         int                     realtime_prio;
68         bool                    no_buildid;
69         bool                    no_buildid_set;
70         bool                    no_buildid_cache;
71         bool                    no_buildid_cache_set;
72         bool                    buildid_all;
73         bool                    timestamp_filename;
74         struct switch_output    switch_output;
75         unsigned long long      samples;
76 };
77
78 static volatile int auxtrace_record__snapshot_started;
79 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
80 static DEFINE_TRIGGER(switch_output_trigger);
81
82 static bool switch_output_signal(struct record *rec)
83 {
84         return rec->switch_output.signal &&
85                trigger_is_ready(&switch_output_trigger);
86 }
87
88 static bool switch_output_size(struct record *rec)
89 {
90         return rec->switch_output.size &&
91                trigger_is_ready(&switch_output_trigger) &&
92                (rec->bytes_written >= rec->switch_output.size);
93 }
94
95 static bool switch_output_time(struct record *rec)
96 {
97         return rec->switch_output.time &&
98                trigger_is_ready(&switch_output_trigger);
99 }
100
101 static int record__write(struct record *rec, void *bf, size_t size)
102 {
103         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
104                 pr_err("failed to write perf data, error: %m\n");
105                 return -1;
106         }
107
108         rec->bytes_written += size;
109
110         if (switch_output_size(rec))
111                 trigger_hit(&switch_output_trigger);
112
113         return 0;
114 }
115
116 static int process_synthesized_event(struct perf_tool *tool,
117                                      union perf_event *event,
118                                      struct perf_sample *sample __maybe_unused,
119                                      struct machine *machine __maybe_unused)
120 {
121         struct record *rec = container_of(tool, struct record, tool);
122         return record__write(rec, event, event->header.size);
123 }
124
125 static int
126 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
127 {
128         struct perf_event_header *pheader;
129         u64 evt_head = head;
130         int size = mask + 1;
131
132         pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
133         pheader = (struct perf_event_header *)(buf + (head & mask));
134         *start = head;
135         while (true) {
136                 if (evt_head - head >= (unsigned int)size) {
137                         pr_debug("Finished reading backward ring buffer: rewind\n");
138                         if (evt_head - head > (unsigned int)size)
139                                 evt_head -= pheader->size;
140                         *end = evt_head;
141                         return 0;
142                 }
143
144                 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
145
146                 if (pheader->size == 0) {
147                         pr_debug("Finished reading backward ring buffer: get start\n");
148                         *end = evt_head;
149                         return 0;
150                 }
151
152                 evt_head += pheader->size;
153                 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
154         }
155         WARN_ONCE(1, "Shouldn't get here\n");
156         return -1;
157 }
158
159 static int
160 rb_find_range(void *data, int mask, u64 head, u64 old,
161               u64 *start, u64 *end, bool backward)
162 {
163         if (!backward) {
164                 *start = old;
165                 *end = head;
166                 return 0;
167         }
168
169         return backward_rb_find_range(data, mask, head, start, end);
170 }
171
172 static int
173 record__mmap_read(struct record *rec, struct perf_mmap *md,
174                   bool overwrite, bool backward)
175 {
176         u64 head = perf_mmap__read_head(md);
177         u64 old = md->prev;
178         u64 end = head, start = old;
179         unsigned char *data = md->base + page_size;
180         unsigned long size;
181         void *buf;
182         int rc = 0;
183
184         if (rb_find_range(data, md->mask, head,
185                           old, &start, &end, backward))
186                 return -1;
187
188         if (start == end)
189                 return 0;
190
191         rec->samples++;
192
193         size = end - start;
194         if (size > (unsigned long)(md->mask) + 1) {
195                 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
196
197                 md->prev = head;
198                 perf_mmap__consume(md, overwrite || backward);
199                 return 0;
200         }
201
202         if ((start & md->mask) + size != (end & md->mask)) {
203                 buf = &data[start & md->mask];
204                 size = md->mask + 1 - (start & md->mask);
205                 start += size;
206
207                 if (record__write(rec, buf, size) < 0) {
208                         rc = -1;
209                         goto out;
210                 }
211         }
212
213         buf = &data[start & md->mask];
214         size = end - start;
215         start += size;
216
217         if (record__write(rec, buf, size) < 0) {
218                 rc = -1;
219                 goto out;
220         }
221
222         md->prev = head;
223         perf_mmap__consume(md, overwrite || backward);
224 out:
225         return rc;
226 }
227
228 static volatile int done;
229 static volatile int signr = -1;
230 static volatile int child_finished;
231
232 static void sig_handler(int sig)
233 {
234         if (sig == SIGCHLD)
235                 child_finished = 1;
236         else
237                 signr = sig;
238
239         done = 1;
240 }
241
242 static void sigsegv_handler(int sig)
243 {
244         perf_hooks__recover();
245         sighandler_dump_stack(sig);
246 }
247
248 static void record__sig_exit(void)
249 {
250         if (signr == -1)
251                 return;
252
253         signal(signr, SIG_DFL);
254         raise(signr);
255 }
256
257 #ifdef HAVE_AUXTRACE_SUPPORT
258
259 static int record__process_auxtrace(struct perf_tool *tool,
260                                     union perf_event *event, void *data1,
261                                     size_t len1, void *data2, size_t len2)
262 {
263         struct record *rec = container_of(tool, struct record, tool);
264         struct perf_data_file *file = &rec->file;
265         size_t padding;
266         u8 pad[8] = {0};
267
268         if (!perf_data_file__is_pipe(file)) {
269                 off_t file_offset;
270                 int fd = perf_data_file__fd(file);
271                 int err;
272
273                 file_offset = lseek(fd, 0, SEEK_CUR);
274                 if (file_offset == -1)
275                         return -1;
276                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
277                                                      event, file_offset);
278                 if (err)
279                         return err;
280         }
281
282         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
283         padding = (len1 + len2) & 7;
284         if (padding)
285                 padding = 8 - padding;
286
287         record__write(rec, event, event->header.size);
288         record__write(rec, data1, len1);
289         if (len2)
290                 record__write(rec, data2, len2);
291         record__write(rec, &pad, padding);
292
293         return 0;
294 }
295
296 static int record__auxtrace_mmap_read(struct record *rec,
297                                       struct auxtrace_mmap *mm)
298 {
299         int ret;
300
301         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
302                                   record__process_auxtrace);
303         if (ret < 0)
304                 return ret;
305
306         if (ret)
307                 rec->samples++;
308
309         return 0;
310 }
311
312 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
313                                                struct auxtrace_mmap *mm)
314 {
315         int ret;
316
317         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
318                                            record__process_auxtrace,
319                                            rec->opts.auxtrace_snapshot_size);
320         if (ret < 0)
321                 return ret;
322
323         if (ret)
324                 rec->samples++;
325
326         return 0;
327 }
328
329 static int record__auxtrace_read_snapshot_all(struct record *rec)
330 {
331         int i;
332         int rc = 0;
333
334         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
335                 struct auxtrace_mmap *mm =
336                                 &rec->evlist->mmap[i].auxtrace_mmap;
337
338                 if (!mm->base)
339                         continue;
340
341                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
342                         rc = -1;
343                         goto out;
344                 }
345         }
346 out:
347         return rc;
348 }
349
350 static void record__read_auxtrace_snapshot(struct record *rec)
351 {
352         pr_debug("Recording AUX area tracing snapshot\n");
353         if (record__auxtrace_read_snapshot_all(rec) < 0) {
354                 trigger_error(&auxtrace_snapshot_trigger);
355         } else {
356                 if (auxtrace_record__snapshot_finish(rec->itr))
357                         trigger_error(&auxtrace_snapshot_trigger);
358                 else
359                         trigger_ready(&auxtrace_snapshot_trigger);
360         }
361 }
362
363 #else
364
365 static inline
366 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
367                                struct auxtrace_mmap *mm __maybe_unused)
368 {
369         return 0;
370 }
371
372 static inline
373 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
374 {
375 }
376
377 static inline
378 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
379 {
380         return 0;
381 }
382
383 #endif
384
385 static int record__mmap_evlist(struct record *rec,
386                                struct perf_evlist *evlist)
387 {
388         struct record_opts *opts = &rec->opts;
389         char msg[512];
390
391         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
392                                  opts->auxtrace_mmap_pages,
393                                  opts->auxtrace_snapshot_mode) < 0) {
394                 if (errno == EPERM) {
395                         pr_err("Permission error mapping pages.\n"
396                                "Consider increasing "
397                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
398                                "or try again with a smaller value of -m/--mmap_pages.\n"
399                                "(current value: %u,%u)\n",
400                                opts->mmap_pages, opts->auxtrace_mmap_pages);
401                         return -errno;
402                 } else {
403                         pr_err("failed to mmap with %d (%s)\n", errno,
404                                 str_error_r(errno, msg, sizeof(msg)));
405                         if (errno)
406                                 return -errno;
407                         else
408                                 return -EINVAL;
409                 }
410         }
411         return 0;
412 }
413
414 static int record__mmap(struct record *rec)
415 {
416         return record__mmap_evlist(rec, rec->evlist);
417 }
418
419 static int record__open(struct record *rec)
420 {
421         char msg[BUFSIZ];
422         struct perf_evsel *pos;
423         struct perf_evlist *evlist = rec->evlist;
424         struct perf_session *session = rec->session;
425         struct record_opts *opts = &rec->opts;
426         struct perf_evsel_config_term *err_term;
427         int rc = 0;
428
429         perf_evlist__config(evlist, opts, &callchain_param);
430
431         evlist__for_each_entry(evlist, pos) {
432 try_again:
433                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
434                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
435                                 if (verbose > 0)
436                                         ui__warning("%s\n", msg);
437                                 goto try_again;
438                         }
439
440                         rc = -errno;
441                         perf_evsel__open_strerror(pos, &opts->target,
442                                                   errno, msg, sizeof(msg));
443                         ui__error("%s\n", msg);
444                         goto out;
445                 }
446         }
447
448         if (perf_evlist__apply_filters(evlist, &pos)) {
449                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
450                         pos->filter, perf_evsel__name(pos), errno,
451                         str_error_r(errno, msg, sizeof(msg)));
452                 rc = -1;
453                 goto out;
454         }
455
456         if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
457                 error("failed to set config \"%s\" on event %s with %d (%s)\n",
458                       err_term->val.drv_cfg, perf_evsel__name(pos), errno,
459                       str_error_r(errno, msg, sizeof(msg)));
460                 rc = -1;
461                 goto out;
462         }
463
464         rc = record__mmap(rec);
465         if (rc)
466                 goto out;
467
468         session->evlist = evlist;
469         perf_session__set_id_hdr_size(session);
470 out:
471         return rc;
472 }
473
474 static int process_sample_event(struct perf_tool *tool,
475                                 union perf_event *event,
476                                 struct perf_sample *sample,
477                                 struct perf_evsel *evsel,
478                                 struct machine *machine)
479 {
480         struct record *rec = container_of(tool, struct record, tool);
481
482         rec->samples++;
483
484         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
485 }
486
487 static int process_buildids(struct record *rec)
488 {
489         struct perf_data_file *file  = &rec->file;
490         struct perf_session *session = rec->session;
491
492         if (file->size == 0)
493                 return 0;
494
495         /*
496          * During this process, it'll load kernel map and replace the
497          * dso->long_name to a real pathname it found.  In this case
498          * we prefer the vmlinux path like
499          *   /lib/modules/3.16.4/build/vmlinux
500          *
501          * rather than build-id path (in debug directory).
502          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
503          */
504         symbol_conf.ignore_vmlinux_buildid = true;
505
506         /*
507          * If --buildid-all is given, it marks all DSO regardless of hits,
508          * so no need to process samples.
509          */
510         if (rec->buildid_all)
511                 rec->tool.sample = NULL;
512
513         return perf_session__process_events(session);
514 }
515
516 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
517 {
518         int err;
519         struct perf_tool *tool = data;
520         /*
521          *As for guest kernel when processing subcommand record&report,
522          *we arrange module mmap prior to guest kernel mmap and trigger
523          *a preload dso because default guest module symbols are loaded
524          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
525          *method is used to avoid symbol missing when the first addr is
526          *in module instead of in guest kernel.
527          */
528         err = perf_event__synthesize_modules(tool, process_synthesized_event,
529                                              machine);
530         if (err < 0)
531                 pr_err("Couldn't record guest kernel [%d]'s reference"
532                        " relocation symbol.\n", machine->pid);
533
534         /*
535          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
536          * have no _text sometimes.
537          */
538         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
539                                                  machine);
540         if (err < 0)
541                 pr_err("Couldn't record guest kernel [%d]'s reference"
542                        " relocation symbol.\n", machine->pid);
543 }
544
545 static struct perf_event_header finished_round_event = {
546         .size = sizeof(struct perf_event_header),
547         .type = PERF_RECORD_FINISHED_ROUND,
548 };
549
550 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
551                                     bool backward)
552 {
553         u64 bytes_written = rec->bytes_written;
554         int i;
555         int rc = 0;
556         struct perf_mmap *maps;
557
558         if (!evlist)
559                 return 0;
560
561         maps = backward ? evlist->backward_mmap : evlist->mmap;
562         if (!maps)
563                 return 0;
564
565         if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
566                 return 0;
567
568         for (i = 0; i < evlist->nr_mmaps; i++) {
569                 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
570
571                 if (maps[i].base) {
572                         if (record__mmap_read(rec, &maps[i],
573                                               evlist->overwrite, backward) != 0) {
574                                 rc = -1;
575                                 goto out;
576                         }
577                 }
578
579                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
580                     record__auxtrace_mmap_read(rec, mm) != 0) {
581                         rc = -1;
582                         goto out;
583                 }
584         }
585
586         /*
587          * Mark the round finished in case we wrote
588          * at least one event.
589          */
590         if (bytes_written != rec->bytes_written)
591                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
592
593         if (backward)
594                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
595 out:
596         return rc;
597 }
598
599 static int record__mmap_read_all(struct record *rec)
600 {
601         int err;
602
603         err = record__mmap_read_evlist(rec, rec->evlist, false);
604         if (err)
605                 return err;
606
607         return record__mmap_read_evlist(rec, rec->evlist, true);
608 }
609
610 static void record__init_features(struct record *rec)
611 {
612         struct perf_session *session = rec->session;
613         int feat;
614
615         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
616                 perf_header__set_feat(&session->header, feat);
617
618         if (rec->no_buildid)
619                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
620
621         if (!have_tracepoints(&rec->evlist->entries))
622                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
623
624         if (!rec->opts.branch_stack)
625                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
626
627         if (!rec->opts.full_auxtrace)
628                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
629
630         perf_header__clear_feat(&session->header, HEADER_STAT);
631 }
632
633 static void
634 record__finish_output(struct record *rec)
635 {
636         struct perf_data_file *file = &rec->file;
637         int fd = perf_data_file__fd(file);
638
639         if (file->is_pipe)
640                 return;
641
642         rec->session->header.data_size += rec->bytes_written;
643         file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
644
645         if (!rec->no_buildid) {
646                 process_buildids(rec);
647
648                 if (rec->buildid_all)
649                         dsos__hit_all(rec->session);
650         }
651         perf_session__write_header(rec->session, rec->evlist, fd, true);
652
653         return;
654 }
655
656 static int record__synthesize_workload(struct record *rec, bool tail)
657 {
658         int err;
659         struct thread_map *thread_map;
660
661         if (rec->opts.tail_synthesize != tail)
662                 return 0;
663
664         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
665         if (thread_map == NULL)
666                 return -1;
667
668         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
669                                                  process_synthesized_event,
670                                                  &rec->session->machines.host,
671                                                  rec->opts.sample_address,
672                                                  rec->opts.proc_map_timeout);
673         thread_map__put(thread_map);
674         return err;
675 }
676
677 static int record__synthesize(struct record *rec, bool tail);
678
679 static int
680 record__switch_output(struct record *rec, bool at_exit)
681 {
682         struct perf_data_file *file = &rec->file;
683         int fd, err;
684
685         /* Same Size:      "2015122520103046"*/
686         char timestamp[] = "InvalidTimestamp";
687
688         record__synthesize(rec, true);
689         if (target__none(&rec->opts.target))
690                 record__synthesize_workload(rec, true);
691
692         rec->samples = 0;
693         record__finish_output(rec);
694         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
695         if (err) {
696                 pr_err("Failed to get current timestamp\n");
697                 return -EINVAL;
698         }
699
700         fd = perf_data_file__switch(file, timestamp,
701                                     rec->session->header.data_offset,
702                                     at_exit);
703         if (fd >= 0 && !at_exit) {
704                 rec->bytes_written = 0;
705                 rec->session->header.data_size = 0;
706         }
707
708         if (!quiet)
709                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
710                         file->path, timestamp);
711
712         /* Output tracking events */
713         if (!at_exit) {
714                 record__synthesize(rec, false);
715
716                 /*
717                  * In 'perf record --switch-output' without -a,
718                  * record__synthesize() in record__switch_output() won't
719                  * generate tracking events because there's no thread_map
720                  * in evlist. Which causes newly created perf.data doesn't
721                  * contain map and comm information.
722                  * Create a fake thread_map and directly call
723                  * perf_event__synthesize_thread_map() for those events.
724                  */
725                 if (target__none(&rec->opts.target))
726                         record__synthesize_workload(rec, false);
727         }
728         return fd;
729 }
730
731 static volatile int workload_exec_errno;
732
733 /*
734  * perf_evlist__prepare_workload will send a SIGUSR1
735  * if the fork fails, since we asked by setting its
736  * want_signal to true.
737  */
738 static void workload_exec_failed_signal(int signo __maybe_unused,
739                                         siginfo_t *info,
740                                         void *ucontext __maybe_unused)
741 {
742         workload_exec_errno = info->si_value.sival_int;
743         done = 1;
744         child_finished = 1;
745 }
746
747 static void snapshot_sig_handler(int sig);
748 static void alarm_sig_handler(int sig);
749
750 int __weak
751 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
752                             struct perf_tool *tool __maybe_unused,
753                             perf_event__handler_t process __maybe_unused,
754                             struct machine *machine __maybe_unused)
755 {
756         return 0;
757 }
758
759 static const struct perf_event_mmap_page *
760 perf_evlist__pick_pc(struct perf_evlist *evlist)
761 {
762         if (evlist) {
763                 if (evlist->mmap && evlist->mmap[0].base)
764                         return evlist->mmap[0].base;
765                 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
766                         return evlist->backward_mmap[0].base;
767         }
768         return NULL;
769 }
770
771 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
772 {
773         const struct perf_event_mmap_page *pc;
774
775         pc = perf_evlist__pick_pc(rec->evlist);
776         if (pc)
777                 return pc;
778         return NULL;
779 }
780
781 static int record__synthesize(struct record *rec, bool tail)
782 {
783         struct perf_session *session = rec->session;
784         struct machine *machine = &session->machines.host;
785         struct perf_data_file *file = &rec->file;
786         struct record_opts *opts = &rec->opts;
787         struct perf_tool *tool = &rec->tool;
788         int fd = perf_data_file__fd(file);
789         int err = 0;
790
791         if (rec->opts.tail_synthesize != tail)
792                 return 0;
793
794         if (file->is_pipe) {
795                 err = perf_event__synthesize_attrs(tool, session,
796                                                    process_synthesized_event);
797                 if (err < 0) {
798                         pr_err("Couldn't synthesize attrs.\n");
799                         goto out;
800                 }
801
802                 if (have_tracepoints(&rec->evlist->entries)) {
803                         /*
804                          * FIXME err <= 0 here actually means that
805                          * there were no tracepoints so its not really
806                          * an error, just that we don't need to
807                          * synthesize anything.  We really have to
808                          * return this more properly and also
809                          * propagate errors that now are calling die()
810                          */
811                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
812                                                                   process_synthesized_event);
813                         if (err <= 0) {
814                                 pr_err("Couldn't record tracing data.\n");
815                                 goto out;
816                         }
817                         rec->bytes_written += err;
818                 }
819         }
820
821         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
822                                           process_synthesized_event, machine);
823         if (err)
824                 goto out;
825
826         if (rec->opts.full_auxtrace) {
827                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
828                                         session, process_synthesized_event);
829                 if (err)
830                         goto out;
831         }
832
833         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
834                                                  machine);
835         WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
836                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
837                            "Check /proc/kallsyms permission or run as root.\n");
838
839         err = perf_event__synthesize_modules(tool, process_synthesized_event,
840                                              machine);
841         WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
842                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
843                            "Check /proc/modules permission or run as root.\n");
844
845         if (perf_guest) {
846                 machines__process_guests(&session->machines,
847                                          perf_event__synthesize_guest_os, tool);
848         }
849
850         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
851                                             process_synthesized_event, opts->sample_address,
852                                             opts->proc_map_timeout);
853 out:
854         return err;
855 }
856
857 static int __cmd_record(struct record *rec, int argc, const char **argv)
858 {
859         int err;
860         int status = 0;
861         unsigned long waking = 0;
862         const bool forks = argc > 0;
863         struct machine *machine;
864         struct perf_tool *tool = &rec->tool;
865         struct record_opts *opts = &rec->opts;
866         struct perf_data_file *file = &rec->file;
867         struct perf_session *session;
868         bool disabled = false, draining = false;
869         int fd;
870
871         rec->progname = argv[0];
872
873         atexit(record__sig_exit);
874         signal(SIGCHLD, sig_handler);
875         signal(SIGINT, sig_handler);
876         signal(SIGTERM, sig_handler);
877         signal(SIGSEGV, sigsegv_handler);
878
879         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
880                 signal(SIGUSR2, snapshot_sig_handler);
881                 if (rec->opts.auxtrace_snapshot_mode)
882                         trigger_on(&auxtrace_snapshot_trigger);
883                 if (rec->switch_output.enabled)
884                         trigger_on(&switch_output_trigger);
885         } else {
886                 signal(SIGUSR2, SIG_IGN);
887         }
888
889         session = perf_session__new(file, false, tool);
890         if (session == NULL) {
891                 pr_err("Perf session creation failed.\n");
892                 return -1;
893         }
894
895         fd = perf_data_file__fd(file);
896         rec->session = session;
897
898         record__init_features(rec);
899
900         if (forks) {
901                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
902                                                     argv, file->is_pipe,
903                                                     workload_exec_failed_signal);
904                 if (err < 0) {
905                         pr_err("Couldn't run the workload!\n");
906                         status = err;
907                         goto out_delete_session;
908                 }
909         }
910
911         if (record__open(rec) != 0) {
912                 err = -1;
913                 goto out_child;
914         }
915
916         err = bpf__apply_obj_config();
917         if (err) {
918                 char errbuf[BUFSIZ];
919
920                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
921                 pr_err("ERROR: Apply config to BPF failed: %s\n",
922                          errbuf);
923                 goto out_child;
924         }
925
926         /*
927          * Normally perf_session__new would do this, but it doesn't have the
928          * evlist.
929          */
930         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
931                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
932                 rec->tool.ordered_events = false;
933         }
934
935         if (!rec->evlist->nr_groups)
936                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
937
938         if (file->is_pipe) {
939                 err = perf_header__write_pipe(fd);
940                 if (err < 0)
941                         goto out_child;
942         } else {
943                 err = perf_session__write_header(session, rec->evlist, fd, false);
944                 if (err < 0)
945                         goto out_child;
946         }
947
948         if (!rec->no_buildid
949             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
950                 pr_err("Couldn't generate buildids. "
951                        "Use --no-buildid to profile anyway.\n");
952                 err = -1;
953                 goto out_child;
954         }
955
956         machine = &session->machines.host;
957
958         err = record__synthesize(rec, false);
959         if (err < 0)
960                 goto out_child;
961
962         if (rec->realtime_prio) {
963                 struct sched_param param;
964
965                 param.sched_priority = rec->realtime_prio;
966                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
967                         pr_err("Could not set realtime priority.\n");
968                         err = -1;
969                         goto out_child;
970                 }
971         }
972
973         /*
974          * When perf is starting the traced process, all the events
975          * (apart from group members) have enable_on_exec=1 set,
976          * so don't spoil it by prematurely enabling them.
977          */
978         if (!target__none(&opts->target) && !opts->initial_delay)
979                 perf_evlist__enable(rec->evlist);
980
981         /*
982          * Let the child rip
983          */
984         if (forks) {
985                 union perf_event *event;
986
987                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
988                 if (event == NULL) {
989                         err = -ENOMEM;
990                         goto out_child;
991                 }
992
993                 /*
994                  * Some H/W events are generated before COMM event
995                  * which is emitted during exec(), so perf script
996                  * cannot see a correct process name for those events.
997                  * Synthesize COMM event to prevent it.
998                  */
999                 perf_event__synthesize_comm(tool, event,
1000                                             rec->evlist->workload.pid,
1001                                             process_synthesized_event,
1002                                             machine);
1003                 free(event);
1004
1005                 perf_evlist__start_workload(rec->evlist);
1006         }
1007
1008         if (opts->initial_delay) {
1009                 usleep(opts->initial_delay * USEC_PER_MSEC);
1010                 perf_evlist__enable(rec->evlist);
1011         }
1012
1013         trigger_ready(&auxtrace_snapshot_trigger);
1014         trigger_ready(&switch_output_trigger);
1015         perf_hooks__invoke_record_start();
1016         for (;;) {
1017                 unsigned long long hits = rec->samples;
1018
1019                 /*
1020                  * rec->evlist->bkw_mmap_state is possible to be
1021                  * BKW_MMAP_EMPTY here: when done == true and
1022                  * hits != rec->samples in previous round.
1023                  *
1024                  * perf_evlist__toggle_bkw_mmap ensure we never
1025                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1026                  */
1027                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1028                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1029
1030                 if (record__mmap_read_all(rec) < 0) {
1031                         trigger_error(&auxtrace_snapshot_trigger);
1032                         trigger_error(&switch_output_trigger);
1033                         err = -1;
1034                         goto out_child;
1035                 }
1036
1037                 if (auxtrace_record__snapshot_started) {
1038                         auxtrace_record__snapshot_started = 0;
1039                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1040                                 record__read_auxtrace_snapshot(rec);
1041                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1042                                 pr_err("AUX area tracing snapshot failed\n");
1043                                 err = -1;
1044                                 goto out_child;
1045                         }
1046                 }
1047
1048                 if (trigger_is_hit(&switch_output_trigger)) {
1049                         /*
1050                          * If switch_output_trigger is hit, the data in
1051                          * overwritable ring buffer should have been collected,
1052                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1053                          *
1054                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1055                          * record__mmap_read_all() didn't collect data from
1056                          * overwritable ring buffer. Read again.
1057                          */
1058                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1059                                 continue;
1060                         trigger_ready(&switch_output_trigger);
1061
1062                         /*
1063                          * Reenable events in overwrite ring buffer after
1064                          * record__mmap_read_all(): we should have collected
1065                          * data from it.
1066                          */
1067                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1068
1069                         if (!quiet)
1070                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1071                                         waking);
1072                         waking = 0;
1073                         fd = record__switch_output(rec, false);
1074                         if (fd < 0) {
1075                                 pr_err("Failed to switch to new file\n");
1076                                 trigger_error(&switch_output_trigger);
1077                                 err = fd;
1078                                 goto out_child;
1079                         }
1080
1081                         /* re-arm the alarm */
1082                         if (rec->switch_output.time)
1083                                 alarm(rec->switch_output.time);
1084                 }
1085
1086                 if (hits == rec->samples) {
1087                         if (done || draining)
1088                                 break;
1089                         err = perf_evlist__poll(rec->evlist, -1);
1090                         /*
1091                          * Propagate error, only if there's any. Ignore positive
1092                          * number of returned events and interrupt error.
1093                          */
1094                         if (err > 0 || (err < 0 && errno == EINTR))
1095                                 err = 0;
1096                         waking++;
1097
1098                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1099                                 draining = true;
1100                 }
1101
1102                 /*
1103                  * When perf is starting the traced process, at the end events
1104                  * die with the process and we wait for that. Thus no need to
1105                  * disable events in this case.
1106                  */
1107                 if (done && !disabled && !target__none(&opts->target)) {
1108                         trigger_off(&auxtrace_snapshot_trigger);
1109                         perf_evlist__disable(rec->evlist);
1110                         disabled = true;
1111                 }
1112         }
1113         trigger_off(&auxtrace_snapshot_trigger);
1114         trigger_off(&switch_output_trigger);
1115
1116         if (forks && workload_exec_errno) {
1117                 char msg[STRERR_BUFSIZE];
1118                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1119                 pr_err("Workload failed: %s\n", emsg);
1120                 err = -1;
1121                 goto out_child;
1122         }
1123
1124         if (!quiet)
1125                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1126
1127         if (target__none(&rec->opts.target))
1128                 record__synthesize_workload(rec, true);
1129
1130 out_child:
1131         if (forks) {
1132                 int exit_status;
1133
1134                 if (!child_finished)
1135                         kill(rec->evlist->workload.pid, SIGTERM);
1136
1137                 wait(&exit_status);
1138
1139                 if (err < 0)
1140                         status = err;
1141                 else if (WIFEXITED(exit_status))
1142                         status = WEXITSTATUS(exit_status);
1143                 else if (WIFSIGNALED(exit_status))
1144                         signr = WTERMSIG(exit_status);
1145         } else
1146                 status = err;
1147
1148         record__synthesize(rec, true);
1149         /* this will be recalculated during process_buildids() */
1150         rec->samples = 0;
1151
1152         if (!err) {
1153                 if (!rec->timestamp_filename) {
1154                         record__finish_output(rec);
1155                 } else {
1156                         fd = record__switch_output(rec, true);
1157                         if (fd < 0) {
1158                                 status = fd;
1159                                 goto out_delete_session;
1160                         }
1161                 }
1162         }
1163
1164         perf_hooks__invoke_record_end();
1165
1166         if (!err && !quiet) {
1167                 char samples[128];
1168                 const char *postfix = rec->timestamp_filename ?
1169                                         ".<timestamp>" : "";
1170
1171                 if (rec->samples && !rec->opts.full_auxtrace)
1172                         scnprintf(samples, sizeof(samples),
1173                                   " (%" PRIu64 " samples)", rec->samples);
1174                 else
1175                         samples[0] = '\0';
1176
1177                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1178                         perf_data_file__size(file) / 1024.0 / 1024.0,
1179                         file->path, postfix, samples);
1180         }
1181
1182 out_delete_session:
1183         perf_session__delete(session);
1184         return status;
1185 }
1186
1187 static void callchain_debug(struct callchain_param *callchain)
1188 {
1189         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1190
1191         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1192
1193         if (callchain->record_mode == CALLCHAIN_DWARF)
1194                 pr_debug("callchain: stack dump size %d\n",
1195                          callchain->dump_size);
1196 }
1197
1198 int record_opts__parse_callchain(struct record_opts *record,
1199                                  struct callchain_param *callchain,
1200                                  const char *arg, bool unset)
1201 {
1202         int ret;
1203         callchain->enabled = !unset;
1204
1205         /* --no-call-graph */
1206         if (unset) {
1207                 callchain->record_mode = CALLCHAIN_NONE;
1208                 pr_debug("callchain: disabled\n");
1209                 return 0;
1210         }
1211
1212         ret = parse_callchain_record_opt(arg, callchain);
1213         if (!ret) {
1214                 /* Enable data address sampling for DWARF unwind. */
1215                 if (callchain->record_mode == CALLCHAIN_DWARF)
1216                         record->sample_address = true;
1217                 callchain_debug(callchain);
1218         }
1219
1220         return ret;
1221 }
1222
1223 int record_parse_callchain_opt(const struct option *opt,
1224                                const char *arg,
1225                                int unset)
1226 {
1227         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1228 }
1229
1230 int record_callchain_opt(const struct option *opt,
1231                          const char *arg __maybe_unused,
1232                          int unset __maybe_unused)
1233 {
1234         struct callchain_param *callchain = opt->value;
1235
1236         callchain->enabled = true;
1237
1238         if (callchain->record_mode == CALLCHAIN_NONE)
1239                 callchain->record_mode = CALLCHAIN_FP;
1240
1241         callchain_debug(callchain);
1242         return 0;
1243 }
1244
1245 static int perf_record_config(const char *var, const char *value, void *cb)
1246 {
1247         struct record *rec = cb;
1248
1249         if (!strcmp(var, "record.build-id")) {
1250                 if (!strcmp(value, "cache"))
1251                         rec->no_buildid_cache = false;
1252                 else if (!strcmp(value, "no-cache"))
1253                         rec->no_buildid_cache = true;
1254                 else if (!strcmp(value, "skip"))
1255                         rec->no_buildid = true;
1256                 else
1257                         return -1;
1258                 return 0;
1259         }
1260         if (!strcmp(var, "record.call-graph"))
1261                 var = "call-graph.record-mode"; /* fall-through */
1262
1263         return perf_default_config(var, value, cb);
1264 }
1265
1266 struct clockid_map {
1267         const char *name;
1268         int clockid;
1269 };
1270
1271 #define CLOCKID_MAP(n, c)       \
1272         { .name = n, .clockid = (c), }
1273
1274 #define CLOCKID_END     { .name = NULL, }
1275
1276
1277 /*
1278  * Add the missing ones, we need to build on many distros...
1279  */
1280 #ifndef CLOCK_MONOTONIC_RAW
1281 #define CLOCK_MONOTONIC_RAW 4
1282 #endif
1283 #ifndef CLOCK_BOOTTIME
1284 #define CLOCK_BOOTTIME 7
1285 #endif
1286 #ifndef CLOCK_TAI
1287 #define CLOCK_TAI 11
1288 #endif
1289
1290 static const struct clockid_map clockids[] = {
1291         /* available for all events, NMI safe */
1292         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1293         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1294
1295         /* available for some events */
1296         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1297         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1298         CLOCKID_MAP("tai", CLOCK_TAI),
1299
1300         /* available for the lazy */
1301         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1302         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1303         CLOCKID_MAP("real", CLOCK_REALTIME),
1304         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1305
1306         CLOCKID_END,
1307 };
1308
1309 static int parse_clockid(const struct option *opt, const char *str, int unset)
1310 {
1311         struct record_opts *opts = (struct record_opts *)opt->value;
1312         const struct clockid_map *cm;
1313         const char *ostr = str;
1314
1315         if (unset) {
1316                 opts->use_clockid = 0;
1317                 return 0;
1318         }
1319
1320         /* no arg passed */
1321         if (!str)
1322                 return 0;
1323
1324         /* no setting it twice */
1325         if (opts->use_clockid)
1326                 return -1;
1327
1328         opts->use_clockid = true;
1329
1330         /* if its a number, we're done */
1331         if (sscanf(str, "%d", &opts->clockid) == 1)
1332                 return 0;
1333
1334         /* allow a "CLOCK_" prefix to the name */
1335         if (!strncasecmp(str, "CLOCK_", 6))
1336                 str += 6;
1337
1338         for (cm = clockids; cm->name; cm++) {
1339                 if (!strcasecmp(str, cm->name)) {
1340                         opts->clockid = cm->clockid;
1341                         return 0;
1342                 }
1343         }
1344
1345         opts->use_clockid = false;
1346         ui__warning("unknown clockid %s, check man page\n", ostr);
1347         return -1;
1348 }
1349
1350 static int record__parse_mmap_pages(const struct option *opt,
1351                                     const char *str,
1352                                     int unset __maybe_unused)
1353 {
1354         struct record_opts *opts = opt->value;
1355         char *s, *p;
1356         unsigned int mmap_pages;
1357         int ret;
1358
1359         if (!str)
1360                 return -EINVAL;
1361
1362         s = strdup(str);
1363         if (!s)
1364                 return -ENOMEM;
1365
1366         p = strchr(s, ',');
1367         if (p)
1368                 *p = '\0';
1369
1370         if (*s) {
1371                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1372                 if (ret)
1373                         goto out_free;
1374                 opts->mmap_pages = mmap_pages;
1375         }
1376
1377         if (!p) {
1378                 ret = 0;
1379                 goto out_free;
1380         }
1381
1382         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1383         if (ret)
1384                 goto out_free;
1385
1386         opts->auxtrace_mmap_pages = mmap_pages;
1387
1388 out_free:
1389         free(s);
1390         return ret;
1391 }
1392
1393 static void switch_output_size_warn(struct record *rec)
1394 {
1395         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1396         struct switch_output *s = &rec->switch_output;
1397
1398         wakeup_size /= 2;
1399
1400         if (s->size < wakeup_size) {
1401                 char buf[100];
1402
1403                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1404                 pr_warning("WARNING: switch-output data size lower than "
1405                            "wakeup kernel buffer size (%s) "
1406                            "expect bigger perf.data sizes\n", buf);
1407         }
1408 }
1409
1410 static int switch_output_setup(struct record *rec)
1411 {
1412         struct switch_output *s = &rec->switch_output;
1413         static struct parse_tag tags_size[] = {
1414                 { .tag  = 'B', .mult = 1       },
1415                 { .tag  = 'K', .mult = 1 << 10 },
1416                 { .tag  = 'M', .mult = 1 << 20 },
1417                 { .tag  = 'G', .mult = 1 << 30 },
1418                 { .tag  = 0 },
1419         };
1420         static struct parse_tag tags_time[] = {
1421                 { .tag  = 's', .mult = 1        },
1422                 { .tag  = 'm', .mult = 60       },
1423                 { .tag  = 'h', .mult = 60*60    },
1424                 { .tag  = 'd', .mult = 60*60*24 },
1425                 { .tag  = 0 },
1426         };
1427         unsigned long val;
1428
1429         if (!s->set)
1430                 return 0;
1431
1432         if (!strcmp(s->str, "signal")) {
1433                 s->signal = true;
1434                 pr_debug("switch-output with SIGUSR2 signal\n");
1435                 goto enabled;
1436         }
1437
1438         val = parse_tag_value(s->str, tags_size);
1439         if (val != (unsigned long) -1) {
1440                 s->size = val;
1441                 pr_debug("switch-output with %s size threshold\n", s->str);
1442                 goto enabled;
1443         }
1444
1445         val = parse_tag_value(s->str, tags_time);
1446         if (val != (unsigned long) -1) {
1447                 s->time = val;
1448                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1449                          s->str, s->time);
1450                 goto enabled;
1451         }
1452
1453         return -1;
1454
1455 enabled:
1456         rec->timestamp_filename = true;
1457         s->enabled              = true;
1458
1459         if (s->size && !rec->opts.no_buffering)
1460                 switch_output_size_warn(rec);
1461
1462         return 0;
1463 }
1464
1465 static const char * const __record_usage[] = {
1466         "perf record [<options>] [<command>]",
1467         "perf record [<options>] -- <command> [<options>]",
1468         NULL
1469 };
1470 const char * const *record_usage = __record_usage;
1471
1472 /*
1473  * XXX Ideally would be local to cmd_record() and passed to a record__new
1474  * because we need to have access to it in record__exit, that is called
1475  * after cmd_record() exits, but since record_options need to be accessible to
1476  * builtin-script, leave it here.
1477  *
1478  * At least we don't ouch it in all the other functions here directly.
1479  *
1480  * Just say no to tons of global variables, sigh.
1481  */
1482 static struct record record = {
1483         .opts = {
1484                 .sample_time         = true,
1485                 .mmap_pages          = UINT_MAX,
1486                 .user_freq           = UINT_MAX,
1487                 .user_interval       = ULLONG_MAX,
1488                 .freq                = 4000,
1489                 .target              = {
1490                         .uses_mmap   = true,
1491                         .default_per_cpu = true,
1492                 },
1493                 .proc_map_timeout     = 500,
1494         },
1495         .tool = {
1496                 .sample         = process_sample_event,
1497                 .fork           = perf_event__process_fork,
1498                 .exit           = perf_event__process_exit,
1499                 .comm           = perf_event__process_comm,
1500                 .mmap           = perf_event__process_mmap,
1501                 .mmap2          = perf_event__process_mmap2,
1502                 .ordered_events = true,
1503         },
1504 };
1505
1506 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1507         "\n\t\t\t\tDefault: fp";
1508
1509 static bool dry_run;
1510
1511 /*
1512  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1513  * with it and switch to use the library functions in perf_evlist that came
1514  * from builtin-record.c, i.e. use record_opts,
1515  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1516  * using pipes, etc.
1517  */
1518 static struct option __record_options[] = {
1519         OPT_CALLBACK('e', "event", &record.evlist, "event",
1520                      "event selector. use 'perf list' to list available events",
1521                      parse_events_option),
1522         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1523                      "event filter", parse_filter),
1524         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1525                            NULL, "don't record events from perf itself",
1526                            exclude_perf),
1527         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1528                     "record events on existing process id"),
1529         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1530                     "record events on existing thread id"),
1531         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1532                     "collect data with this RT SCHED_FIFO priority"),
1533         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1534                     "collect data without buffering"),
1535         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1536                     "collect raw sample records from all opened counters"),
1537         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1538                             "system-wide collection from all CPUs"),
1539         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1540                     "list of cpus to monitor"),
1541         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1542         OPT_STRING('o', "output", &record.file.path, "file",
1543                     "output file name"),
1544         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1545                         &record.opts.no_inherit_set,
1546                         "child tasks do not inherit counters"),
1547         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1548                     "synthesize non-sample events at the end of output"),
1549         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1550         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1551         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1552                      "number of mmap data pages and AUX area tracing mmap pages",
1553                      record__parse_mmap_pages),
1554         OPT_BOOLEAN(0, "group", &record.opts.group,
1555                     "put the counters into a counter group"),
1556         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1557                            NULL, "enables call-graph recording" ,
1558                            &record_callchain_opt),
1559         OPT_CALLBACK(0, "call-graph", &record.opts,
1560                      "record_mode[,record_size]", record_callchain_help,
1561                      &record_parse_callchain_opt),
1562         OPT_INCR('v', "verbose", &verbose,
1563                     "be more verbose (show counter open errors, etc)"),
1564         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1565         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1566                     "per thread counts"),
1567         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1568         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1569         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1570                         &record.opts.sample_time_set,
1571                         "Record the sample timestamps"),
1572         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1573         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1574                     "don't sample"),
1575         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1576                         &record.no_buildid_cache_set,
1577                         "do not update the buildid cache"),
1578         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1579                         &record.no_buildid_set,
1580                         "do not collect buildids in perf.data"),
1581         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1582                      "monitor event in cgroup name only",
1583                      parse_cgroups),
1584         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1585                   "ms to wait before starting measurement after program start"),
1586         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1587                    "user to profile"),
1588
1589         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1590                      "branch any", "sample any taken branches",
1591                      parse_branch_stack),
1592
1593         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1594                      "branch filter mask", "branch stack filter modes",
1595                      parse_branch_stack),
1596         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1597                     "sample by weight (on special events only)"),
1598         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1599                     "sample transaction flags (special events only)"),
1600         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1601                     "use per-thread mmaps"),
1602         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1603                     "sample selected machine registers on interrupt,"
1604                     " use -I ? to list register names", parse_regs),
1605         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1606                     "Record running/enabled time of read (:S) events"),
1607         OPT_CALLBACK('k', "clockid", &record.opts,
1608         "clockid", "clockid to use for events, see clock_gettime()",
1609         parse_clockid),
1610         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1611                           "opts", "AUX area tracing Snapshot Mode", ""),
1612         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1613                         "per thread proc mmap processing timeout in ms"),
1614         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1615                     "Record context switch events"),
1616         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1617                          "Configure all used events to run in kernel space.",
1618                          PARSE_OPT_EXCLUSIVE),
1619         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1620                          "Configure all used events to run in user space.",
1621                          PARSE_OPT_EXCLUSIVE),
1622         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1623                    "clang binary to use for compiling BPF scriptlets"),
1624         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1625                    "options passed to clang when compiling BPF scriptlets"),
1626         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1627                    "file", "vmlinux pathname"),
1628         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1629                     "Record build-id of all DSOs regardless of hits"),
1630         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1631                     "append timestamp to output filename"),
1632         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1633                           &record.switch_output.set, "signal,size,time",
1634                           "Switch output when receive SIGUSR2 or cross size,time threshold",
1635                           "signal"),
1636         OPT_BOOLEAN(0, "dry-run", &dry_run,
1637                     "Parse options then exit"),
1638         OPT_END()
1639 };
1640
1641 struct option *record_options = __record_options;
1642
1643 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1644 {
1645         int err;
1646         struct record *rec = &record;
1647         char errbuf[BUFSIZ];
1648
1649 #ifndef HAVE_LIBBPF_SUPPORT
1650 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1651         set_nobuild('\0', "clang-path", true);
1652         set_nobuild('\0', "clang-opt", true);
1653 # undef set_nobuild
1654 #endif
1655
1656 #ifndef HAVE_BPF_PROLOGUE
1657 # if !defined (HAVE_DWARF_SUPPORT)
1658 #  define REASON  "NO_DWARF=1"
1659 # elif !defined (HAVE_LIBBPF_SUPPORT)
1660 #  define REASON  "NO_LIBBPF=1"
1661 # else
1662 #  define REASON  "this architecture doesn't support BPF prologue"
1663 # endif
1664 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1665         set_nobuild('\0', "vmlinux", true);
1666 # undef set_nobuild
1667 # undef REASON
1668 #endif
1669
1670         rec->evlist = perf_evlist__new();
1671         if (rec->evlist == NULL)
1672                 return -ENOMEM;
1673
1674         err = perf_config(perf_record_config, rec);
1675         if (err)
1676                 return err;
1677
1678         argc = parse_options(argc, argv, record_options, record_usage,
1679                             PARSE_OPT_STOP_AT_NON_OPTION);
1680         if (quiet)
1681                 perf_quiet_option();
1682
1683         /* Make system wide (-a) the default target. */
1684         if (!argc && target__none(&rec->opts.target))
1685                 rec->opts.target.system_wide = true;
1686
1687         if (nr_cgroups && !rec->opts.target.system_wide) {
1688                 usage_with_options_msg(record_usage, record_options,
1689                         "cgroup monitoring only available in system-wide mode");
1690
1691         }
1692         if (rec->opts.record_switch_events &&
1693             !perf_can_record_switch_events()) {
1694                 ui__error("kernel does not support recording context switch events\n");
1695                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1696                 return -EINVAL;
1697         }
1698
1699         if (switch_output_setup(rec)) {
1700                 parse_options_usage(record_usage, record_options, "switch-output", 0);
1701                 return -EINVAL;
1702         }
1703
1704         if (rec->switch_output.time) {
1705                 signal(SIGALRM, alarm_sig_handler);
1706                 alarm(rec->switch_output.time);
1707         }
1708
1709         if (!rec->itr) {
1710                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1711                 if (err)
1712                         goto out;
1713         }
1714
1715         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1716                                               rec->opts.auxtrace_snapshot_opts);
1717         if (err)
1718                 goto out;
1719
1720         /*
1721          * Allow aliases to facilitate the lookup of symbols for address
1722          * filters. Refer to auxtrace_parse_filters().
1723          */
1724         symbol_conf.allow_aliases = true;
1725
1726         symbol__init(NULL);
1727
1728         err = auxtrace_parse_filters(rec->evlist);
1729         if (err)
1730                 goto out;
1731
1732         if (dry_run)
1733                 goto out;
1734
1735         err = bpf__setup_stdout(rec->evlist);
1736         if (err) {
1737                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1738                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1739                          errbuf);
1740                 goto out;
1741         }
1742
1743         err = -ENOMEM;
1744
1745         if (symbol_conf.kptr_restrict)
1746                 pr_warning(
1747 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1748 "check /proc/sys/kernel/kptr_restrict.\n\n"
1749 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1750 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1751 "Samples in kernel modules won't be resolved at all.\n\n"
1752 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1753 "even with a suitable vmlinux or kallsyms file.\n\n");
1754
1755         if (rec->no_buildid_cache || rec->no_buildid) {
1756                 disable_buildid_cache();
1757         } else if (rec->switch_output.enabled) {
1758                 /*
1759                  * In 'perf record --switch-output', disable buildid
1760                  * generation by default to reduce data file switching
1761                  * overhead. Still generate buildid if they are required
1762                  * explicitly using
1763                  *
1764                  *  perf record --switch-output --no-no-buildid \
1765                  *              --no-no-buildid-cache
1766                  *
1767                  * Following code equals to:
1768                  *
1769                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
1770                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1771                  *         disable_buildid_cache();
1772                  */
1773                 bool disable = true;
1774
1775                 if (rec->no_buildid_set && !rec->no_buildid)
1776                         disable = false;
1777                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1778                         disable = false;
1779                 if (disable) {
1780                         rec->no_buildid = true;
1781                         rec->no_buildid_cache = true;
1782                         disable_buildid_cache();
1783                 }
1784         }
1785
1786         if (record.opts.overwrite)
1787                 record.opts.tail_synthesize = true;
1788
1789         if (rec->evlist->nr_entries == 0 &&
1790             perf_evlist__add_default(rec->evlist) < 0) {
1791                 pr_err("Not enough memory for event selector list\n");
1792                 goto out;
1793         }
1794
1795         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1796                 rec->opts.no_inherit = true;
1797
1798         err = target__validate(&rec->opts.target);
1799         if (err) {
1800                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1801                 ui__warning("%s", errbuf);
1802         }
1803
1804         err = target__parse_uid(&rec->opts.target);
1805         if (err) {
1806                 int saved_errno = errno;
1807
1808                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1809                 ui__error("%s", errbuf);
1810
1811                 err = -saved_errno;
1812                 goto out;
1813         }
1814
1815         /* Enable ignoring missing threads when -u option is defined. */
1816         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1817
1818         err = -ENOMEM;
1819         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1820                 usage_with_options(record_usage, record_options);
1821
1822         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1823         if (err)
1824                 goto out;
1825
1826         /*
1827          * We take all buildids when the file contains
1828          * AUX area tracing data because we do not decode the
1829          * trace because it would take too long.
1830          */
1831         if (rec->opts.full_auxtrace)
1832                 rec->buildid_all = true;
1833
1834         if (record_opts__config(&rec->opts)) {
1835                 err = -EINVAL;
1836                 goto out;
1837         }
1838
1839         err = __cmd_record(&record, argc, argv);
1840 out:
1841         perf_evlist__delete(rec->evlist);
1842         symbol__exit();
1843         auxtrace_record__free(rec->itr);
1844         return err;
1845 }
1846
1847 static void snapshot_sig_handler(int sig __maybe_unused)
1848 {
1849         struct record *rec = &record;
1850
1851         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1852                 trigger_hit(&auxtrace_snapshot_trigger);
1853                 auxtrace_record__snapshot_started = 1;
1854                 if (auxtrace_record__snapshot_start(record.itr))
1855                         trigger_error(&auxtrace_snapshot_trigger);
1856         }
1857
1858         if (switch_output_signal(rec))
1859                 trigger_hit(&switch_output_trigger);
1860 }
1861
1862 static void alarm_sig_handler(int sig __maybe_unused)
1863 {
1864         struct record *rec = &record;
1865
1866         if (switch_output_time(rec))
1867                 trigger_hit(&switch_output_trigger);
1868 }