]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
Merge tag 'v4.11-rc6' into perf/core, to pick up fixes
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "asm/bug.h"
42
43 #include <unistd.h>
44 #include <sched.h>
45 #include <sys/mman.h>
46 #include <asm/bug.h>
47 #include <linux/time64.h>
48
49 struct switch_output {
50         bool             enabled;
51         bool             signal;
52         unsigned long    size;
53         unsigned long    time;
54         const char      *str;
55         bool             set;
56 };
57
58 struct record {
59         struct perf_tool        tool;
60         struct record_opts      opts;
61         u64                     bytes_written;
62         struct perf_data_file   file;
63         struct auxtrace_record  *itr;
64         struct perf_evlist      *evlist;
65         struct perf_session     *session;
66         const char              *progname;
67         int                     realtime_prio;
68         bool                    no_buildid;
69         bool                    no_buildid_set;
70         bool                    no_buildid_cache;
71         bool                    no_buildid_cache_set;
72         bool                    buildid_all;
73         bool                    timestamp_filename;
74         struct switch_output    switch_output;
75         unsigned long long      samples;
76 };
77
78 static volatile int auxtrace_record__snapshot_started;
79 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
80 static DEFINE_TRIGGER(switch_output_trigger);
81
82 static bool switch_output_signal(struct record *rec)
83 {
84         return rec->switch_output.signal &&
85                trigger_is_ready(&switch_output_trigger);
86 }
87
88 static bool switch_output_size(struct record *rec)
89 {
90         return rec->switch_output.size &&
91                trigger_is_ready(&switch_output_trigger) &&
92                (rec->bytes_written >= rec->switch_output.size);
93 }
94
95 static bool switch_output_time(struct record *rec)
96 {
97         return rec->switch_output.time &&
98                trigger_is_ready(&switch_output_trigger);
99 }
100
101 static int record__write(struct record *rec, void *bf, size_t size)
102 {
103         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
104                 pr_err("failed to write perf data, error: %m\n");
105                 return -1;
106         }
107
108         rec->bytes_written += size;
109
110         if (switch_output_size(rec))
111                 trigger_hit(&switch_output_trigger);
112
113         return 0;
114 }
115
116 static int process_synthesized_event(struct perf_tool *tool,
117                                      union perf_event *event,
118                                      struct perf_sample *sample __maybe_unused,
119                                      struct machine *machine __maybe_unused)
120 {
121         struct record *rec = container_of(tool, struct record, tool);
122         return record__write(rec, event, event->header.size);
123 }
124
125 static int
126 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
127 {
128         struct perf_event_header *pheader;
129         u64 evt_head = head;
130         int size = mask + 1;
131
132         pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
133         pheader = (struct perf_event_header *)(buf + (head & mask));
134         *start = head;
135         while (true) {
136                 if (evt_head - head >= (unsigned int)size) {
137                         pr_debug("Finished reading backward ring buffer: rewind\n");
138                         if (evt_head - head > (unsigned int)size)
139                                 evt_head -= pheader->size;
140                         *end = evt_head;
141                         return 0;
142                 }
143
144                 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
145
146                 if (pheader->size == 0) {
147                         pr_debug("Finished reading backward ring buffer: get start\n");
148                         *end = evt_head;
149                         return 0;
150                 }
151
152                 evt_head += pheader->size;
153                 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
154         }
155         WARN_ONCE(1, "Shouldn't get here\n");
156         return -1;
157 }
158
159 static int
160 rb_find_range(void *data, int mask, u64 head, u64 old,
161               u64 *start, u64 *end, bool backward)
162 {
163         if (!backward) {
164                 *start = old;
165                 *end = head;
166                 return 0;
167         }
168
169         return backward_rb_find_range(data, mask, head, start, end);
170 }
171
172 static int
173 record__mmap_read(struct record *rec, struct perf_mmap *md,
174                   bool overwrite, bool backward)
175 {
176         u64 head = perf_mmap__read_head(md);
177         u64 old = md->prev;
178         u64 end = head, start = old;
179         unsigned char *data = md->base + page_size;
180         unsigned long size;
181         void *buf;
182         int rc = 0;
183
184         if (rb_find_range(data, md->mask, head,
185                           old, &start, &end, backward))
186                 return -1;
187
188         if (start == end)
189                 return 0;
190
191         rec->samples++;
192
193         size = end - start;
194         if (size > (unsigned long)(md->mask) + 1) {
195                 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
196
197                 md->prev = head;
198                 perf_mmap__consume(md, overwrite || backward);
199                 return 0;
200         }
201
202         if ((start & md->mask) + size != (end & md->mask)) {
203                 buf = &data[start & md->mask];
204                 size = md->mask + 1 - (start & md->mask);
205                 start += size;
206
207                 if (record__write(rec, buf, size) < 0) {
208                         rc = -1;
209                         goto out;
210                 }
211         }
212
213         buf = &data[start & md->mask];
214         size = end - start;
215         start += size;
216
217         if (record__write(rec, buf, size) < 0) {
218                 rc = -1;
219                 goto out;
220         }
221
222         md->prev = head;
223         perf_mmap__consume(md, overwrite || backward);
224 out:
225         return rc;
226 }
227
228 static volatile int done;
229 static volatile int signr = -1;
230 static volatile int child_finished;
231
232 static void sig_handler(int sig)
233 {
234         if (sig == SIGCHLD)
235                 child_finished = 1;
236         else
237                 signr = sig;
238
239         done = 1;
240 }
241
242 static void sigsegv_handler(int sig)
243 {
244         perf_hooks__recover();
245         sighandler_dump_stack(sig);
246 }
247
248 static void record__sig_exit(void)
249 {
250         if (signr == -1)
251                 return;
252
253         signal(signr, SIG_DFL);
254         raise(signr);
255 }
256
257 #ifdef HAVE_AUXTRACE_SUPPORT
258
259 static int record__process_auxtrace(struct perf_tool *tool,
260                                     union perf_event *event, void *data1,
261                                     size_t len1, void *data2, size_t len2)
262 {
263         struct record *rec = container_of(tool, struct record, tool);
264         struct perf_data_file *file = &rec->file;
265         size_t padding;
266         u8 pad[8] = {0};
267
268         if (!perf_data_file__is_pipe(file)) {
269                 off_t file_offset;
270                 int fd = perf_data_file__fd(file);
271                 int err;
272
273                 file_offset = lseek(fd, 0, SEEK_CUR);
274                 if (file_offset == -1)
275                         return -1;
276                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
277                                                      event, file_offset);
278                 if (err)
279                         return err;
280         }
281
282         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
283         padding = (len1 + len2) & 7;
284         if (padding)
285                 padding = 8 - padding;
286
287         record__write(rec, event, event->header.size);
288         record__write(rec, data1, len1);
289         if (len2)
290                 record__write(rec, data2, len2);
291         record__write(rec, &pad, padding);
292
293         return 0;
294 }
295
296 static int record__auxtrace_mmap_read(struct record *rec,
297                                       struct auxtrace_mmap *mm)
298 {
299         int ret;
300
301         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
302                                   record__process_auxtrace);
303         if (ret < 0)
304                 return ret;
305
306         if (ret)
307                 rec->samples++;
308
309         return 0;
310 }
311
312 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
313                                                struct auxtrace_mmap *mm)
314 {
315         int ret;
316
317         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
318                                            record__process_auxtrace,
319                                            rec->opts.auxtrace_snapshot_size);
320         if (ret < 0)
321                 return ret;
322
323         if (ret)
324                 rec->samples++;
325
326         return 0;
327 }
328
329 static int record__auxtrace_read_snapshot_all(struct record *rec)
330 {
331         int i;
332         int rc = 0;
333
334         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
335                 struct auxtrace_mmap *mm =
336                                 &rec->evlist->mmap[i].auxtrace_mmap;
337
338                 if (!mm->base)
339                         continue;
340
341                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
342                         rc = -1;
343                         goto out;
344                 }
345         }
346 out:
347         return rc;
348 }
349
350 static void record__read_auxtrace_snapshot(struct record *rec)
351 {
352         pr_debug("Recording AUX area tracing snapshot\n");
353         if (record__auxtrace_read_snapshot_all(rec) < 0) {
354                 trigger_error(&auxtrace_snapshot_trigger);
355         } else {
356                 if (auxtrace_record__snapshot_finish(rec->itr))
357                         trigger_error(&auxtrace_snapshot_trigger);
358                 else
359                         trigger_ready(&auxtrace_snapshot_trigger);
360         }
361 }
362
363 #else
364
365 static inline
366 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
367                                struct auxtrace_mmap *mm __maybe_unused)
368 {
369         return 0;
370 }
371
372 static inline
373 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
374 {
375 }
376
377 static inline
378 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
379 {
380         return 0;
381 }
382
383 #endif
384
385 static int record__mmap_evlist(struct record *rec,
386                                struct perf_evlist *evlist)
387 {
388         struct record_opts *opts = &rec->opts;
389         char msg[512];
390
391         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
392                                  opts->auxtrace_mmap_pages,
393                                  opts->auxtrace_snapshot_mode) < 0) {
394                 if (errno == EPERM) {
395                         pr_err("Permission error mapping pages.\n"
396                                "Consider increasing "
397                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
398                                "or try again with a smaller value of -m/--mmap_pages.\n"
399                                "(current value: %u,%u)\n",
400                                opts->mmap_pages, opts->auxtrace_mmap_pages);
401                         return -errno;
402                 } else {
403                         pr_err("failed to mmap with %d (%s)\n", errno,
404                                 str_error_r(errno, msg, sizeof(msg)));
405                         if (errno)
406                                 return -errno;
407                         else
408                                 return -EINVAL;
409                 }
410         }
411         return 0;
412 }
413
414 static int record__mmap(struct record *rec)
415 {
416         return record__mmap_evlist(rec, rec->evlist);
417 }
418
419 static int record__open(struct record *rec)
420 {
421         char msg[BUFSIZ];
422         struct perf_evsel *pos;
423         struct perf_evlist *evlist = rec->evlist;
424         struct perf_session *session = rec->session;
425         struct record_opts *opts = &rec->opts;
426         struct perf_evsel_config_term *err_term;
427         int rc = 0;
428
429         perf_evlist__config(evlist, opts, &callchain_param);
430
431         evlist__for_each_entry(evlist, pos) {
432 try_again:
433                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
434                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
435                                 if (verbose > 0)
436                                         ui__warning("%s\n", msg);
437                                 goto try_again;
438                         }
439
440                         rc = -errno;
441                         perf_evsel__open_strerror(pos, &opts->target,
442                                                   errno, msg, sizeof(msg));
443                         ui__error("%s\n", msg);
444                         goto out;
445                 }
446         }
447
448         if (perf_evlist__apply_filters(evlist, &pos)) {
449                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
450                         pos->filter, perf_evsel__name(pos), errno,
451                         str_error_r(errno, msg, sizeof(msg)));
452                 rc = -1;
453                 goto out;
454         }
455
456         if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
457                 error("failed to set config \"%s\" on event %s with %d (%s)\n",
458                       err_term->val.drv_cfg, perf_evsel__name(pos), errno,
459                       str_error_r(errno, msg, sizeof(msg)));
460                 rc = -1;
461                 goto out;
462         }
463
464         rc = record__mmap(rec);
465         if (rc)
466                 goto out;
467
468         session->evlist = evlist;
469         perf_session__set_id_hdr_size(session);
470 out:
471         return rc;
472 }
473
474 static int process_sample_event(struct perf_tool *tool,
475                                 union perf_event *event,
476                                 struct perf_sample *sample,
477                                 struct perf_evsel *evsel,
478                                 struct machine *machine)
479 {
480         struct record *rec = container_of(tool, struct record, tool);
481
482         rec->samples++;
483
484         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
485 }
486
487 static int process_buildids(struct record *rec)
488 {
489         struct perf_data_file *file  = &rec->file;
490         struct perf_session *session = rec->session;
491
492         if (file->size == 0)
493                 return 0;
494
495         /*
496          * During this process, it'll load kernel map and replace the
497          * dso->long_name to a real pathname it found.  In this case
498          * we prefer the vmlinux path like
499          *   /lib/modules/3.16.4/build/vmlinux
500          *
501          * rather than build-id path (in debug directory).
502          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
503          */
504         symbol_conf.ignore_vmlinux_buildid = true;
505
506         /*
507          * If --buildid-all is given, it marks all DSO regardless of hits,
508          * so no need to process samples.
509          */
510         if (rec->buildid_all)
511                 rec->tool.sample = NULL;
512
513         return perf_session__process_events(session);
514 }
515
516 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
517 {
518         int err;
519         struct perf_tool *tool = data;
520         /*
521          *As for guest kernel when processing subcommand record&report,
522          *we arrange module mmap prior to guest kernel mmap and trigger
523          *a preload dso because default guest module symbols are loaded
524          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
525          *method is used to avoid symbol missing when the first addr is
526          *in module instead of in guest kernel.
527          */
528         err = perf_event__synthesize_modules(tool, process_synthesized_event,
529                                              machine);
530         if (err < 0)
531                 pr_err("Couldn't record guest kernel [%d]'s reference"
532                        " relocation symbol.\n", machine->pid);
533
534         /*
535          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
536          * have no _text sometimes.
537          */
538         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
539                                                  machine);
540         if (err < 0)
541                 pr_err("Couldn't record guest kernel [%d]'s reference"
542                        " relocation symbol.\n", machine->pid);
543 }
544
545 static struct perf_event_header finished_round_event = {
546         .size = sizeof(struct perf_event_header),
547         .type = PERF_RECORD_FINISHED_ROUND,
548 };
549
550 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
551                                     bool backward)
552 {
553         u64 bytes_written = rec->bytes_written;
554         int i;
555         int rc = 0;
556         struct perf_mmap *maps;
557
558         if (!evlist)
559                 return 0;
560
561         maps = backward ? evlist->backward_mmap : evlist->mmap;
562         if (!maps)
563                 return 0;
564
565         if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
566                 return 0;
567
568         for (i = 0; i < evlist->nr_mmaps; i++) {
569                 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
570
571                 if (maps[i].base) {
572                         if (record__mmap_read(rec, &maps[i],
573                                               evlist->overwrite, backward) != 0) {
574                                 rc = -1;
575                                 goto out;
576                         }
577                 }
578
579                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
580                     record__auxtrace_mmap_read(rec, mm) != 0) {
581                         rc = -1;
582                         goto out;
583                 }
584         }
585
586         /*
587          * Mark the round finished in case we wrote
588          * at least one event.
589          */
590         if (bytes_written != rec->bytes_written)
591                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
592
593         if (backward)
594                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
595 out:
596         return rc;
597 }
598
599 static int record__mmap_read_all(struct record *rec)
600 {
601         int err;
602
603         err = record__mmap_read_evlist(rec, rec->evlist, false);
604         if (err)
605                 return err;
606
607         return record__mmap_read_evlist(rec, rec->evlist, true);
608 }
609
610 static void record__init_features(struct record *rec)
611 {
612         struct perf_session *session = rec->session;
613         int feat;
614
615         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
616                 perf_header__set_feat(&session->header, feat);
617
618         if (rec->no_buildid)
619                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
620
621         if (!have_tracepoints(&rec->evlist->entries))
622                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
623
624         if (!rec->opts.branch_stack)
625                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
626
627         if (!rec->opts.full_auxtrace)
628                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
629
630         perf_header__clear_feat(&session->header, HEADER_STAT);
631 }
632
633 static void
634 record__finish_output(struct record *rec)
635 {
636         struct perf_data_file *file = &rec->file;
637         int fd = perf_data_file__fd(file);
638
639         if (file->is_pipe)
640                 return;
641
642         rec->session->header.data_size += rec->bytes_written;
643         file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
644
645         if (!rec->no_buildid) {
646                 process_buildids(rec);
647
648                 if (rec->buildid_all)
649                         dsos__hit_all(rec->session);
650         }
651         perf_session__write_header(rec->session, rec->evlist, fd, true);
652
653         return;
654 }
655
656 static int record__synthesize_workload(struct record *rec, bool tail)
657 {
658         int err;
659         struct thread_map *thread_map;
660
661         if (rec->opts.tail_synthesize != tail)
662                 return 0;
663
664         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
665         if (thread_map == NULL)
666                 return -1;
667
668         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
669                                                  process_synthesized_event,
670                                                  &rec->session->machines.host,
671                                                  rec->opts.sample_address,
672                                                  rec->opts.proc_map_timeout);
673         thread_map__put(thread_map);
674         return err;
675 }
676
677 static int record__synthesize(struct record *rec, bool tail);
678
679 static int
680 record__switch_output(struct record *rec, bool at_exit)
681 {
682         struct perf_data_file *file = &rec->file;
683         int fd, err;
684
685         /* Same Size:      "2015122520103046"*/
686         char timestamp[] = "InvalidTimestamp";
687
688         record__synthesize(rec, true);
689         if (target__none(&rec->opts.target))
690                 record__synthesize_workload(rec, true);
691
692         rec->samples = 0;
693         record__finish_output(rec);
694         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
695         if (err) {
696                 pr_err("Failed to get current timestamp\n");
697                 return -EINVAL;
698         }
699
700         fd = perf_data_file__switch(file, timestamp,
701                                     rec->session->header.data_offset,
702                                     at_exit);
703         if (fd >= 0 && !at_exit) {
704                 rec->bytes_written = 0;
705                 rec->session->header.data_size = 0;
706         }
707
708         if (!quiet)
709                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
710                         file->path, timestamp);
711
712         /* Output tracking events */
713         if (!at_exit) {
714                 record__synthesize(rec, false);
715
716                 /*
717                  * In 'perf record --switch-output' without -a,
718                  * record__synthesize() in record__switch_output() won't
719                  * generate tracking events because there's no thread_map
720                  * in evlist. Which causes newly created perf.data doesn't
721                  * contain map and comm information.
722                  * Create a fake thread_map and directly call
723                  * perf_event__synthesize_thread_map() for those events.
724                  */
725                 if (target__none(&rec->opts.target))
726                         record__synthesize_workload(rec, false);
727         }
728         return fd;
729 }
730
731 static volatile int workload_exec_errno;
732
733 /*
734  * perf_evlist__prepare_workload will send a SIGUSR1
735  * if the fork fails, since we asked by setting its
736  * want_signal to true.
737  */
738 static void workload_exec_failed_signal(int signo __maybe_unused,
739                                         siginfo_t *info,
740                                         void *ucontext __maybe_unused)
741 {
742         workload_exec_errno = info->si_value.sival_int;
743         done = 1;
744         child_finished = 1;
745 }
746
747 static void snapshot_sig_handler(int sig);
748 static void alarm_sig_handler(int sig);
749
750 int __weak
751 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
752                             struct perf_tool *tool __maybe_unused,
753                             perf_event__handler_t process __maybe_unused,
754                             struct machine *machine __maybe_unused)
755 {
756         return 0;
757 }
758
759 static const struct perf_event_mmap_page *
760 perf_evlist__pick_pc(struct perf_evlist *evlist)
761 {
762         if (evlist) {
763                 if (evlist->mmap && evlist->mmap[0].base)
764                         return evlist->mmap[0].base;
765                 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
766                         return evlist->backward_mmap[0].base;
767         }
768         return NULL;
769 }
770
771 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
772 {
773         const struct perf_event_mmap_page *pc;
774
775         pc = perf_evlist__pick_pc(rec->evlist);
776         if (pc)
777                 return pc;
778         return NULL;
779 }
780
781 static int record__synthesize(struct record *rec, bool tail)
782 {
783         struct perf_session *session = rec->session;
784         struct machine *machine = &session->machines.host;
785         struct perf_data_file *file = &rec->file;
786         struct record_opts *opts = &rec->opts;
787         struct perf_tool *tool = &rec->tool;
788         int fd = perf_data_file__fd(file);
789         int err = 0;
790
791         if (rec->opts.tail_synthesize != tail)
792                 return 0;
793
794         if (file->is_pipe) {
795                 err = perf_event__synthesize_attrs(tool, session,
796                                                    process_synthesized_event);
797                 if (err < 0) {
798                         pr_err("Couldn't synthesize attrs.\n");
799                         goto out;
800                 }
801
802                 if (have_tracepoints(&rec->evlist->entries)) {
803                         /*
804                          * FIXME err <= 0 here actually means that
805                          * there were no tracepoints so its not really
806                          * an error, just that we don't need to
807                          * synthesize anything.  We really have to
808                          * return this more properly and also
809                          * propagate errors that now are calling die()
810                          */
811                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
812                                                                   process_synthesized_event);
813                         if (err <= 0) {
814                                 pr_err("Couldn't record tracing data.\n");
815                                 goto out;
816                         }
817                         rec->bytes_written += err;
818                 }
819         }
820
821         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
822                                           process_synthesized_event, machine);
823         if (err)
824                 goto out;
825
826         if (rec->opts.full_auxtrace) {
827                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
828                                         session, process_synthesized_event);
829                 if (err)
830                         goto out;
831         }
832
833         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
834                                                  machine);
835         WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
836                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
837                            "Check /proc/kallsyms permission or run as root.\n");
838
839         err = perf_event__synthesize_modules(tool, process_synthesized_event,
840                                              machine);
841         WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
842                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
843                            "Check /proc/modules permission or run as root.\n");
844
845         if (perf_guest) {
846                 machines__process_guests(&session->machines,
847                                          perf_event__synthesize_guest_os, tool);
848         }
849
850         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
851                                             process_synthesized_event, opts->sample_address,
852                                             opts->proc_map_timeout);
853 out:
854         return err;
855 }
856
857 static int __cmd_record(struct record *rec, int argc, const char **argv)
858 {
859         int err;
860         int status = 0;
861         unsigned long waking = 0;
862         const bool forks = argc > 0;
863         struct machine *machine;
864         struct perf_tool *tool = &rec->tool;
865         struct record_opts *opts = &rec->opts;
866         struct perf_data_file *file = &rec->file;
867         struct perf_session *session;
868         bool disabled = false, draining = false;
869         int fd;
870
871         rec->progname = argv[0];
872
873         atexit(record__sig_exit);
874         signal(SIGCHLD, sig_handler);
875         signal(SIGINT, sig_handler);
876         signal(SIGTERM, sig_handler);
877         signal(SIGSEGV, sigsegv_handler);
878
879         if (rec->opts.record_namespaces)
880                 tool->namespace_events = true;
881
882         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
883                 signal(SIGUSR2, snapshot_sig_handler);
884                 if (rec->opts.auxtrace_snapshot_mode)
885                         trigger_on(&auxtrace_snapshot_trigger);
886                 if (rec->switch_output.enabled)
887                         trigger_on(&switch_output_trigger);
888         } else {
889                 signal(SIGUSR2, SIG_IGN);
890         }
891
892         session = perf_session__new(file, false, tool);
893         if (session == NULL) {
894                 pr_err("Perf session creation failed.\n");
895                 return -1;
896         }
897
898         fd = perf_data_file__fd(file);
899         rec->session = session;
900
901         record__init_features(rec);
902
903         if (forks) {
904                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
905                                                     argv, file->is_pipe,
906                                                     workload_exec_failed_signal);
907                 if (err < 0) {
908                         pr_err("Couldn't run the workload!\n");
909                         status = err;
910                         goto out_delete_session;
911                 }
912         }
913
914         if (record__open(rec) != 0) {
915                 err = -1;
916                 goto out_child;
917         }
918
919         err = bpf__apply_obj_config();
920         if (err) {
921                 char errbuf[BUFSIZ];
922
923                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
924                 pr_err("ERROR: Apply config to BPF failed: %s\n",
925                          errbuf);
926                 goto out_child;
927         }
928
929         /*
930          * Normally perf_session__new would do this, but it doesn't have the
931          * evlist.
932          */
933         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
934                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
935                 rec->tool.ordered_events = false;
936         }
937
938         if (!rec->evlist->nr_groups)
939                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
940
941         if (file->is_pipe) {
942                 err = perf_header__write_pipe(fd);
943                 if (err < 0)
944                         goto out_child;
945         } else {
946                 err = perf_session__write_header(session, rec->evlist, fd, false);
947                 if (err < 0)
948                         goto out_child;
949         }
950
951         if (!rec->no_buildid
952             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
953                 pr_err("Couldn't generate buildids. "
954                        "Use --no-buildid to profile anyway.\n");
955                 err = -1;
956                 goto out_child;
957         }
958
959         machine = &session->machines.host;
960
961         err = record__synthesize(rec, false);
962         if (err < 0)
963                 goto out_child;
964
965         if (rec->realtime_prio) {
966                 struct sched_param param;
967
968                 param.sched_priority = rec->realtime_prio;
969                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
970                         pr_err("Could not set realtime priority.\n");
971                         err = -1;
972                         goto out_child;
973                 }
974         }
975
976         /*
977          * When perf is starting the traced process, all the events
978          * (apart from group members) have enable_on_exec=1 set,
979          * so don't spoil it by prematurely enabling them.
980          */
981         if (!target__none(&opts->target) && !opts->initial_delay)
982                 perf_evlist__enable(rec->evlist);
983
984         /*
985          * Let the child rip
986          */
987         if (forks) {
988                 union perf_event *event;
989                 pid_t tgid;
990
991                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
992                 if (event == NULL) {
993                         err = -ENOMEM;
994                         goto out_child;
995                 }
996
997                 /*
998                  * Some H/W events are generated before COMM event
999                  * which is emitted during exec(), so perf script
1000                  * cannot see a correct process name for those events.
1001                  * Synthesize COMM event to prevent it.
1002                  */
1003                 tgid = perf_event__synthesize_comm(tool, event,
1004                                                    rec->evlist->workload.pid,
1005                                                    process_synthesized_event,
1006                                                    machine);
1007                 free(event);
1008
1009                 if (tgid == -1)
1010                         goto out_child;
1011
1012                 event = malloc(sizeof(event->namespaces) +
1013                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1014                                machine->id_hdr_size);
1015                 if (event == NULL) {
1016                         err = -ENOMEM;
1017                         goto out_child;
1018                 }
1019
1020                 /*
1021                  * Synthesize NAMESPACES event for the command specified.
1022                  */
1023                 perf_event__synthesize_namespaces(tool, event,
1024                                                   rec->evlist->workload.pid,
1025                                                   tgid, process_synthesized_event,
1026                                                   machine);
1027                 free(event);
1028
1029                 perf_evlist__start_workload(rec->evlist);
1030         }
1031
1032         if (opts->initial_delay) {
1033                 usleep(opts->initial_delay * USEC_PER_MSEC);
1034                 perf_evlist__enable(rec->evlist);
1035         }
1036
1037         trigger_ready(&auxtrace_snapshot_trigger);
1038         trigger_ready(&switch_output_trigger);
1039         perf_hooks__invoke_record_start();
1040         for (;;) {
1041                 unsigned long long hits = rec->samples;
1042
1043                 /*
1044                  * rec->evlist->bkw_mmap_state is possible to be
1045                  * BKW_MMAP_EMPTY here: when done == true and
1046                  * hits != rec->samples in previous round.
1047                  *
1048                  * perf_evlist__toggle_bkw_mmap ensure we never
1049                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1050                  */
1051                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1052                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1053
1054                 if (record__mmap_read_all(rec) < 0) {
1055                         trigger_error(&auxtrace_snapshot_trigger);
1056                         trigger_error(&switch_output_trigger);
1057                         err = -1;
1058                         goto out_child;
1059                 }
1060
1061                 if (auxtrace_record__snapshot_started) {
1062                         auxtrace_record__snapshot_started = 0;
1063                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1064                                 record__read_auxtrace_snapshot(rec);
1065                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1066                                 pr_err("AUX area tracing snapshot failed\n");
1067                                 err = -1;
1068                                 goto out_child;
1069                         }
1070                 }
1071
1072                 if (trigger_is_hit(&switch_output_trigger)) {
1073                         /*
1074                          * If switch_output_trigger is hit, the data in
1075                          * overwritable ring buffer should have been collected,
1076                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1077                          *
1078                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1079                          * record__mmap_read_all() didn't collect data from
1080                          * overwritable ring buffer. Read again.
1081                          */
1082                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1083                                 continue;
1084                         trigger_ready(&switch_output_trigger);
1085
1086                         /*
1087                          * Reenable events in overwrite ring buffer after
1088                          * record__mmap_read_all(): we should have collected
1089                          * data from it.
1090                          */
1091                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1092
1093                         if (!quiet)
1094                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1095                                         waking);
1096                         waking = 0;
1097                         fd = record__switch_output(rec, false);
1098                         if (fd < 0) {
1099                                 pr_err("Failed to switch to new file\n");
1100                                 trigger_error(&switch_output_trigger);
1101                                 err = fd;
1102                                 goto out_child;
1103                         }
1104
1105                         /* re-arm the alarm */
1106                         if (rec->switch_output.time)
1107                                 alarm(rec->switch_output.time);
1108                 }
1109
1110                 if (hits == rec->samples) {
1111                         if (done || draining)
1112                                 break;
1113                         err = perf_evlist__poll(rec->evlist, -1);
1114                         /*
1115                          * Propagate error, only if there's any. Ignore positive
1116                          * number of returned events and interrupt error.
1117                          */
1118                         if (err > 0 || (err < 0 && errno == EINTR))
1119                                 err = 0;
1120                         waking++;
1121
1122                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1123                                 draining = true;
1124                 }
1125
1126                 /*
1127                  * When perf is starting the traced process, at the end events
1128                  * die with the process and we wait for that. Thus no need to
1129                  * disable events in this case.
1130                  */
1131                 if (done && !disabled && !target__none(&opts->target)) {
1132                         trigger_off(&auxtrace_snapshot_trigger);
1133                         perf_evlist__disable(rec->evlist);
1134                         disabled = true;
1135                 }
1136         }
1137         trigger_off(&auxtrace_snapshot_trigger);
1138         trigger_off(&switch_output_trigger);
1139
1140         if (forks && workload_exec_errno) {
1141                 char msg[STRERR_BUFSIZE];
1142                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1143                 pr_err("Workload failed: %s\n", emsg);
1144                 err = -1;
1145                 goto out_child;
1146         }
1147
1148         if (!quiet)
1149                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1150
1151         if (target__none(&rec->opts.target))
1152                 record__synthesize_workload(rec, true);
1153
1154 out_child:
1155         if (forks) {
1156                 int exit_status;
1157
1158                 if (!child_finished)
1159                         kill(rec->evlist->workload.pid, SIGTERM);
1160
1161                 wait(&exit_status);
1162
1163                 if (err < 0)
1164                         status = err;
1165                 else if (WIFEXITED(exit_status))
1166                         status = WEXITSTATUS(exit_status);
1167                 else if (WIFSIGNALED(exit_status))
1168                         signr = WTERMSIG(exit_status);
1169         } else
1170                 status = err;
1171
1172         record__synthesize(rec, true);
1173         /* this will be recalculated during process_buildids() */
1174         rec->samples = 0;
1175
1176         if (!err) {
1177                 if (!rec->timestamp_filename) {
1178                         record__finish_output(rec);
1179                 } else {
1180                         fd = record__switch_output(rec, true);
1181                         if (fd < 0) {
1182                                 status = fd;
1183                                 goto out_delete_session;
1184                         }
1185                 }
1186         }
1187
1188         perf_hooks__invoke_record_end();
1189
1190         if (!err && !quiet) {
1191                 char samples[128];
1192                 const char *postfix = rec->timestamp_filename ?
1193                                         ".<timestamp>" : "";
1194
1195                 if (rec->samples && !rec->opts.full_auxtrace)
1196                         scnprintf(samples, sizeof(samples),
1197                                   " (%" PRIu64 " samples)", rec->samples);
1198                 else
1199                         samples[0] = '\0';
1200
1201                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1202                         perf_data_file__size(file) / 1024.0 / 1024.0,
1203                         file->path, postfix, samples);
1204         }
1205
1206 out_delete_session:
1207         perf_session__delete(session);
1208         return status;
1209 }
1210
1211 static void callchain_debug(struct callchain_param *callchain)
1212 {
1213         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1214
1215         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1216
1217         if (callchain->record_mode == CALLCHAIN_DWARF)
1218                 pr_debug("callchain: stack dump size %d\n",
1219                          callchain->dump_size);
1220 }
1221
1222 int record_opts__parse_callchain(struct record_opts *record,
1223                                  struct callchain_param *callchain,
1224                                  const char *arg, bool unset)
1225 {
1226         int ret;
1227         callchain->enabled = !unset;
1228
1229         /* --no-call-graph */
1230         if (unset) {
1231                 callchain->record_mode = CALLCHAIN_NONE;
1232                 pr_debug("callchain: disabled\n");
1233                 return 0;
1234         }
1235
1236         ret = parse_callchain_record_opt(arg, callchain);
1237         if (!ret) {
1238                 /* Enable data address sampling for DWARF unwind. */
1239                 if (callchain->record_mode == CALLCHAIN_DWARF)
1240                         record->sample_address = true;
1241                 callchain_debug(callchain);
1242         }
1243
1244         return ret;
1245 }
1246
1247 int record_parse_callchain_opt(const struct option *opt,
1248                                const char *arg,
1249                                int unset)
1250 {
1251         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1252 }
1253
1254 int record_callchain_opt(const struct option *opt,
1255                          const char *arg __maybe_unused,
1256                          int unset __maybe_unused)
1257 {
1258         struct callchain_param *callchain = opt->value;
1259
1260         callchain->enabled = true;
1261
1262         if (callchain->record_mode == CALLCHAIN_NONE)
1263                 callchain->record_mode = CALLCHAIN_FP;
1264
1265         callchain_debug(callchain);
1266         return 0;
1267 }
1268
1269 static int perf_record_config(const char *var, const char *value, void *cb)
1270 {
1271         struct record *rec = cb;
1272
1273         if (!strcmp(var, "record.build-id")) {
1274                 if (!strcmp(value, "cache"))
1275                         rec->no_buildid_cache = false;
1276                 else if (!strcmp(value, "no-cache"))
1277                         rec->no_buildid_cache = true;
1278                 else if (!strcmp(value, "skip"))
1279                         rec->no_buildid = true;
1280                 else
1281                         return -1;
1282                 return 0;
1283         }
1284         if (!strcmp(var, "record.call-graph"))
1285                 var = "call-graph.record-mode"; /* fall-through */
1286
1287         return perf_default_config(var, value, cb);
1288 }
1289
1290 struct clockid_map {
1291         const char *name;
1292         int clockid;
1293 };
1294
1295 #define CLOCKID_MAP(n, c)       \
1296         { .name = n, .clockid = (c), }
1297
1298 #define CLOCKID_END     { .name = NULL, }
1299
1300
1301 /*
1302  * Add the missing ones, we need to build on many distros...
1303  */
1304 #ifndef CLOCK_MONOTONIC_RAW
1305 #define CLOCK_MONOTONIC_RAW 4
1306 #endif
1307 #ifndef CLOCK_BOOTTIME
1308 #define CLOCK_BOOTTIME 7
1309 #endif
1310 #ifndef CLOCK_TAI
1311 #define CLOCK_TAI 11
1312 #endif
1313
1314 static const struct clockid_map clockids[] = {
1315         /* available for all events, NMI safe */
1316         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1317         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1318
1319         /* available for some events */
1320         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1321         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1322         CLOCKID_MAP("tai", CLOCK_TAI),
1323
1324         /* available for the lazy */
1325         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1326         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1327         CLOCKID_MAP("real", CLOCK_REALTIME),
1328         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1329
1330         CLOCKID_END,
1331 };
1332
1333 static int parse_clockid(const struct option *opt, const char *str, int unset)
1334 {
1335         struct record_opts *opts = (struct record_opts *)opt->value;
1336         const struct clockid_map *cm;
1337         const char *ostr = str;
1338
1339         if (unset) {
1340                 opts->use_clockid = 0;
1341                 return 0;
1342         }
1343
1344         /* no arg passed */
1345         if (!str)
1346                 return 0;
1347
1348         /* no setting it twice */
1349         if (opts->use_clockid)
1350                 return -1;
1351
1352         opts->use_clockid = true;
1353
1354         /* if its a number, we're done */
1355         if (sscanf(str, "%d", &opts->clockid) == 1)
1356                 return 0;
1357
1358         /* allow a "CLOCK_" prefix to the name */
1359         if (!strncasecmp(str, "CLOCK_", 6))
1360                 str += 6;
1361
1362         for (cm = clockids; cm->name; cm++) {
1363                 if (!strcasecmp(str, cm->name)) {
1364                         opts->clockid = cm->clockid;
1365                         return 0;
1366                 }
1367         }
1368
1369         opts->use_clockid = false;
1370         ui__warning("unknown clockid %s, check man page\n", ostr);
1371         return -1;
1372 }
1373
1374 static int record__parse_mmap_pages(const struct option *opt,
1375                                     const char *str,
1376                                     int unset __maybe_unused)
1377 {
1378         struct record_opts *opts = opt->value;
1379         char *s, *p;
1380         unsigned int mmap_pages;
1381         int ret;
1382
1383         if (!str)
1384                 return -EINVAL;
1385
1386         s = strdup(str);
1387         if (!s)
1388                 return -ENOMEM;
1389
1390         p = strchr(s, ',');
1391         if (p)
1392                 *p = '\0';
1393
1394         if (*s) {
1395                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1396                 if (ret)
1397                         goto out_free;
1398                 opts->mmap_pages = mmap_pages;
1399         }
1400
1401         if (!p) {
1402                 ret = 0;
1403                 goto out_free;
1404         }
1405
1406         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1407         if (ret)
1408                 goto out_free;
1409
1410         opts->auxtrace_mmap_pages = mmap_pages;
1411
1412 out_free:
1413         free(s);
1414         return ret;
1415 }
1416
1417 static void switch_output_size_warn(struct record *rec)
1418 {
1419         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1420         struct switch_output *s = &rec->switch_output;
1421
1422         wakeup_size /= 2;
1423
1424         if (s->size < wakeup_size) {
1425                 char buf[100];
1426
1427                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1428                 pr_warning("WARNING: switch-output data size lower than "
1429                            "wakeup kernel buffer size (%s) "
1430                            "expect bigger perf.data sizes\n", buf);
1431         }
1432 }
1433
1434 static int switch_output_setup(struct record *rec)
1435 {
1436         struct switch_output *s = &rec->switch_output;
1437         static struct parse_tag tags_size[] = {
1438                 { .tag  = 'B', .mult = 1       },
1439                 { .tag  = 'K', .mult = 1 << 10 },
1440                 { .tag  = 'M', .mult = 1 << 20 },
1441                 { .tag  = 'G', .mult = 1 << 30 },
1442                 { .tag  = 0 },
1443         };
1444         static struct parse_tag tags_time[] = {
1445                 { .tag  = 's', .mult = 1        },
1446                 { .tag  = 'm', .mult = 60       },
1447                 { .tag  = 'h', .mult = 60*60    },
1448                 { .tag  = 'd', .mult = 60*60*24 },
1449                 { .tag  = 0 },
1450         };
1451         unsigned long val;
1452
1453         if (!s->set)
1454                 return 0;
1455
1456         if (!strcmp(s->str, "signal")) {
1457                 s->signal = true;
1458                 pr_debug("switch-output with SIGUSR2 signal\n");
1459                 goto enabled;
1460         }
1461
1462         val = parse_tag_value(s->str, tags_size);
1463         if (val != (unsigned long) -1) {
1464                 s->size = val;
1465                 pr_debug("switch-output with %s size threshold\n", s->str);
1466                 goto enabled;
1467         }
1468
1469         val = parse_tag_value(s->str, tags_time);
1470         if (val != (unsigned long) -1) {
1471                 s->time = val;
1472                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1473                          s->str, s->time);
1474                 goto enabled;
1475         }
1476
1477         return -1;
1478
1479 enabled:
1480         rec->timestamp_filename = true;
1481         s->enabled              = true;
1482
1483         if (s->size && !rec->opts.no_buffering)
1484                 switch_output_size_warn(rec);
1485
1486         return 0;
1487 }
1488
1489 static const char * const __record_usage[] = {
1490         "perf record [<options>] [<command>]",
1491         "perf record [<options>] -- <command> [<options>]",
1492         NULL
1493 };
1494 const char * const *record_usage = __record_usage;
1495
1496 /*
1497  * XXX Ideally would be local to cmd_record() and passed to a record__new
1498  * because we need to have access to it in record__exit, that is called
1499  * after cmd_record() exits, but since record_options need to be accessible to
1500  * builtin-script, leave it here.
1501  *
1502  * At least we don't ouch it in all the other functions here directly.
1503  *
1504  * Just say no to tons of global variables, sigh.
1505  */
1506 static struct record record = {
1507         .opts = {
1508                 .sample_time         = true,
1509                 .mmap_pages          = UINT_MAX,
1510                 .user_freq           = UINT_MAX,
1511                 .user_interval       = ULLONG_MAX,
1512                 .freq                = 4000,
1513                 .target              = {
1514                         .uses_mmap   = true,
1515                         .default_per_cpu = true,
1516                 },
1517                 .proc_map_timeout     = 500,
1518         },
1519         .tool = {
1520                 .sample         = process_sample_event,
1521                 .fork           = perf_event__process_fork,
1522                 .exit           = perf_event__process_exit,
1523                 .comm           = perf_event__process_comm,
1524                 .namespaces     = perf_event__process_namespaces,
1525                 .mmap           = perf_event__process_mmap,
1526                 .mmap2          = perf_event__process_mmap2,
1527                 .ordered_events = true,
1528         },
1529 };
1530
1531 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1532         "\n\t\t\t\tDefault: fp";
1533
1534 static bool dry_run;
1535
1536 /*
1537  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1538  * with it and switch to use the library functions in perf_evlist that came
1539  * from builtin-record.c, i.e. use record_opts,
1540  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1541  * using pipes, etc.
1542  */
1543 static struct option __record_options[] = {
1544         OPT_CALLBACK('e', "event", &record.evlist, "event",
1545                      "event selector. use 'perf list' to list available events",
1546                      parse_events_option),
1547         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1548                      "event filter", parse_filter),
1549         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1550                            NULL, "don't record events from perf itself",
1551                            exclude_perf),
1552         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1553                     "record events on existing process id"),
1554         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1555                     "record events on existing thread id"),
1556         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1557                     "collect data with this RT SCHED_FIFO priority"),
1558         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1559                     "collect data without buffering"),
1560         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1561                     "collect raw sample records from all opened counters"),
1562         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1563                             "system-wide collection from all CPUs"),
1564         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1565                     "list of cpus to monitor"),
1566         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1567         OPT_STRING('o', "output", &record.file.path, "file",
1568                     "output file name"),
1569         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1570                         &record.opts.no_inherit_set,
1571                         "child tasks do not inherit counters"),
1572         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1573                     "synthesize non-sample events at the end of output"),
1574         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1575         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1576         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1577                      "number of mmap data pages and AUX area tracing mmap pages",
1578                      record__parse_mmap_pages),
1579         OPT_BOOLEAN(0, "group", &record.opts.group,
1580                     "put the counters into a counter group"),
1581         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1582                            NULL, "enables call-graph recording" ,
1583                            &record_callchain_opt),
1584         OPT_CALLBACK(0, "call-graph", &record.opts,
1585                      "record_mode[,record_size]", record_callchain_help,
1586                      &record_parse_callchain_opt),
1587         OPT_INCR('v', "verbose", &verbose,
1588                     "be more verbose (show counter open errors, etc)"),
1589         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1590         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1591                     "per thread counts"),
1592         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1593         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1594         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1595                         &record.opts.sample_time_set,
1596                         "Record the sample timestamps"),
1597         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1598         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1599                     "don't sample"),
1600         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1601                         &record.no_buildid_cache_set,
1602                         "do not update the buildid cache"),
1603         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1604                         &record.no_buildid_set,
1605                         "do not collect buildids in perf.data"),
1606         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1607                      "monitor event in cgroup name only",
1608                      parse_cgroups),
1609         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1610                   "ms to wait before starting measurement after program start"),
1611         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1612                    "user to profile"),
1613
1614         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1615                      "branch any", "sample any taken branches",
1616                      parse_branch_stack),
1617
1618         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1619                      "branch filter mask", "branch stack filter modes",
1620                      parse_branch_stack),
1621         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1622                     "sample by weight (on special events only)"),
1623         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1624                     "sample transaction flags (special events only)"),
1625         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1626                     "use per-thread mmaps"),
1627         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1628                     "sample selected machine registers on interrupt,"
1629                     " use -I ? to list register names", parse_regs),
1630         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1631                     "Record running/enabled time of read (:S) events"),
1632         OPT_CALLBACK('k', "clockid", &record.opts,
1633         "clockid", "clockid to use for events, see clock_gettime()",
1634         parse_clockid),
1635         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1636                           "opts", "AUX area tracing Snapshot Mode", ""),
1637         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1638                         "per thread proc mmap processing timeout in ms"),
1639         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1640                     "Record namespaces events"),
1641         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1642                     "Record context switch events"),
1643         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1644                          "Configure all used events to run in kernel space.",
1645                          PARSE_OPT_EXCLUSIVE),
1646         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1647                          "Configure all used events to run in user space.",
1648                          PARSE_OPT_EXCLUSIVE),
1649         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1650                    "clang binary to use for compiling BPF scriptlets"),
1651         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1652                    "options passed to clang when compiling BPF scriptlets"),
1653         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1654                    "file", "vmlinux pathname"),
1655         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1656                     "Record build-id of all DSOs regardless of hits"),
1657         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1658                     "append timestamp to output filename"),
1659         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1660                           &record.switch_output.set, "signal,size,time",
1661                           "Switch output when receive SIGUSR2 or cross size,time threshold",
1662                           "signal"),
1663         OPT_BOOLEAN(0, "dry-run", &dry_run,
1664                     "Parse options then exit"),
1665         OPT_END()
1666 };
1667
1668 struct option *record_options = __record_options;
1669
1670 int cmd_record(int argc, const char **argv)
1671 {
1672         int err;
1673         struct record *rec = &record;
1674         char errbuf[BUFSIZ];
1675
1676 #ifndef HAVE_LIBBPF_SUPPORT
1677 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1678         set_nobuild('\0', "clang-path", true);
1679         set_nobuild('\0', "clang-opt", true);
1680 # undef set_nobuild
1681 #endif
1682
1683 #ifndef HAVE_BPF_PROLOGUE
1684 # if !defined (HAVE_DWARF_SUPPORT)
1685 #  define REASON  "NO_DWARF=1"
1686 # elif !defined (HAVE_LIBBPF_SUPPORT)
1687 #  define REASON  "NO_LIBBPF=1"
1688 # else
1689 #  define REASON  "this architecture doesn't support BPF prologue"
1690 # endif
1691 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1692         set_nobuild('\0', "vmlinux", true);
1693 # undef set_nobuild
1694 # undef REASON
1695 #endif
1696
1697         rec->evlist = perf_evlist__new();
1698         if (rec->evlist == NULL)
1699                 return -ENOMEM;
1700
1701         err = perf_config(perf_record_config, rec);
1702         if (err)
1703                 return err;
1704
1705         argc = parse_options(argc, argv, record_options, record_usage,
1706                             PARSE_OPT_STOP_AT_NON_OPTION);
1707         if (quiet)
1708                 perf_quiet_option();
1709
1710         /* Make system wide (-a) the default target. */
1711         if (!argc && target__none(&rec->opts.target))
1712                 rec->opts.target.system_wide = true;
1713
1714         if (nr_cgroups && !rec->opts.target.system_wide) {
1715                 usage_with_options_msg(record_usage, record_options,
1716                         "cgroup monitoring only available in system-wide mode");
1717
1718         }
1719         if (rec->opts.record_switch_events &&
1720             !perf_can_record_switch_events()) {
1721                 ui__error("kernel does not support recording context switch events\n");
1722                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1723                 return -EINVAL;
1724         }
1725
1726         if (switch_output_setup(rec)) {
1727                 parse_options_usage(record_usage, record_options, "switch-output", 0);
1728                 return -EINVAL;
1729         }
1730
1731         if (rec->switch_output.time) {
1732                 signal(SIGALRM, alarm_sig_handler);
1733                 alarm(rec->switch_output.time);
1734         }
1735
1736         if (!rec->itr) {
1737                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1738                 if (err)
1739                         goto out;
1740         }
1741
1742         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1743                                               rec->opts.auxtrace_snapshot_opts);
1744         if (err)
1745                 goto out;
1746
1747         /*
1748          * Allow aliases to facilitate the lookup of symbols for address
1749          * filters. Refer to auxtrace_parse_filters().
1750          */
1751         symbol_conf.allow_aliases = true;
1752
1753         symbol__init(NULL);
1754
1755         err = auxtrace_parse_filters(rec->evlist);
1756         if (err)
1757                 goto out;
1758
1759         if (dry_run)
1760                 goto out;
1761
1762         err = bpf__setup_stdout(rec->evlist);
1763         if (err) {
1764                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1765                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1766                          errbuf);
1767                 goto out;
1768         }
1769
1770         err = -ENOMEM;
1771
1772         if (symbol_conf.kptr_restrict)
1773                 pr_warning(
1774 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1775 "check /proc/sys/kernel/kptr_restrict.\n\n"
1776 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1777 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1778 "Samples in kernel modules won't be resolved at all.\n\n"
1779 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1780 "even with a suitable vmlinux or kallsyms file.\n\n");
1781
1782         if (rec->no_buildid_cache || rec->no_buildid) {
1783                 disable_buildid_cache();
1784         } else if (rec->switch_output.enabled) {
1785                 /*
1786                  * In 'perf record --switch-output', disable buildid
1787                  * generation by default to reduce data file switching
1788                  * overhead. Still generate buildid if they are required
1789                  * explicitly using
1790                  *
1791                  *  perf record --switch-output --no-no-buildid \
1792                  *              --no-no-buildid-cache
1793                  *
1794                  * Following code equals to:
1795                  *
1796                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
1797                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1798                  *         disable_buildid_cache();
1799                  */
1800                 bool disable = true;
1801
1802                 if (rec->no_buildid_set && !rec->no_buildid)
1803                         disable = false;
1804                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1805                         disable = false;
1806                 if (disable) {
1807                         rec->no_buildid = true;
1808                         rec->no_buildid_cache = true;
1809                         disable_buildid_cache();
1810                 }
1811         }
1812
1813         if (record.opts.overwrite)
1814                 record.opts.tail_synthesize = true;
1815
1816         if (rec->evlist->nr_entries == 0 &&
1817             perf_evlist__add_default(rec->evlist) < 0) {
1818                 pr_err("Not enough memory for event selector list\n");
1819                 goto out;
1820         }
1821
1822         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1823                 rec->opts.no_inherit = true;
1824
1825         err = target__validate(&rec->opts.target);
1826         if (err) {
1827                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1828                 ui__warning("%s", errbuf);
1829         }
1830
1831         err = target__parse_uid(&rec->opts.target);
1832         if (err) {
1833                 int saved_errno = errno;
1834
1835                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1836                 ui__error("%s", errbuf);
1837
1838                 err = -saved_errno;
1839                 goto out;
1840         }
1841
1842         /* Enable ignoring missing threads when -u option is defined. */
1843         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1844
1845         err = -ENOMEM;
1846         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1847                 usage_with_options(record_usage, record_options);
1848
1849         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1850         if (err)
1851                 goto out;
1852
1853         /*
1854          * We take all buildids when the file contains
1855          * AUX area tracing data because we do not decode the
1856          * trace because it would take too long.
1857          */
1858         if (rec->opts.full_auxtrace)
1859                 rec->buildid_all = true;
1860
1861         if (record_opts__config(&rec->opts)) {
1862                 err = -EINVAL;
1863                 goto out;
1864         }
1865
1866         err = __cmd_record(&record, argc, argv);
1867 out:
1868         perf_evlist__delete(rec->evlist);
1869         symbol__exit();
1870         auxtrace_record__free(rec->itr);
1871         return err;
1872 }
1873
1874 static void snapshot_sig_handler(int sig __maybe_unused)
1875 {
1876         struct record *rec = &record;
1877
1878         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1879                 trigger_hit(&auxtrace_snapshot_trigger);
1880                 auxtrace_record__snapshot_started = 1;
1881                 if (auxtrace_record__snapshot_start(record.itr))
1882                         trigger_error(&auxtrace_snapshot_trigger);
1883         }
1884
1885         if (switch_output_signal(rec))
1886                 trigger_hit(&switch_output_trigger);
1887 }
1888
1889 static void alarm_sig_handler(int sig __maybe_unused)
1890 {
1891         struct record *rec = &record;
1892
1893         if (switch_output_time(rec))
1894                 trigger_hit(&switch_output_trigger);
1895 }