]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
perf record: Fix --switch-output documentation and comment
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "asm/bug.h"
42
43 #include <unistd.h>
44 #include <sched.h>
45 #include <sys/mman.h>
46 #include <asm/bug.h>
47 #include <linux/time64.h>
48
49 struct record {
50         struct perf_tool        tool;
51         struct record_opts      opts;
52         u64                     bytes_written;
53         struct perf_data_file   file;
54         struct auxtrace_record  *itr;
55         struct perf_evlist      *evlist;
56         struct perf_session     *session;
57         const char              *progname;
58         int                     realtime_prio;
59         bool                    no_buildid;
60         bool                    no_buildid_set;
61         bool                    no_buildid_cache;
62         bool                    no_buildid_cache_set;
63         bool                    buildid_all;
64         bool                    timestamp_filename;
65         bool                    switch_output;
66         unsigned long long      samples;
67 };
68
69 static int record__write(struct record *rec, void *bf, size_t size)
70 {
71         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
72                 pr_err("failed to write perf data, error: %m\n");
73                 return -1;
74         }
75
76         rec->bytes_written += size;
77         return 0;
78 }
79
80 static int process_synthesized_event(struct perf_tool *tool,
81                                      union perf_event *event,
82                                      struct perf_sample *sample __maybe_unused,
83                                      struct machine *machine __maybe_unused)
84 {
85         struct record *rec = container_of(tool, struct record, tool);
86         return record__write(rec, event, event->header.size);
87 }
88
89 static int
90 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
91 {
92         struct perf_event_header *pheader;
93         u64 evt_head = head;
94         int size = mask + 1;
95
96         pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
97         pheader = (struct perf_event_header *)(buf + (head & mask));
98         *start = head;
99         while (true) {
100                 if (evt_head - head >= (unsigned int)size) {
101                         pr_debug("Finished reading backward ring buffer: rewind\n");
102                         if (evt_head - head > (unsigned int)size)
103                                 evt_head -= pheader->size;
104                         *end = evt_head;
105                         return 0;
106                 }
107
108                 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
109
110                 if (pheader->size == 0) {
111                         pr_debug("Finished reading backward ring buffer: get start\n");
112                         *end = evt_head;
113                         return 0;
114                 }
115
116                 evt_head += pheader->size;
117                 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
118         }
119         WARN_ONCE(1, "Shouldn't get here\n");
120         return -1;
121 }
122
123 static int
124 rb_find_range(void *data, int mask, u64 head, u64 old,
125               u64 *start, u64 *end, bool backward)
126 {
127         if (!backward) {
128                 *start = old;
129                 *end = head;
130                 return 0;
131         }
132
133         return backward_rb_find_range(data, mask, head, start, end);
134 }
135
136 static int
137 record__mmap_read(struct record *rec, struct perf_mmap *md,
138                   bool overwrite, bool backward)
139 {
140         u64 head = perf_mmap__read_head(md);
141         u64 old = md->prev;
142         u64 end = head, start = old;
143         unsigned char *data = md->base + page_size;
144         unsigned long size;
145         void *buf;
146         int rc = 0;
147
148         if (rb_find_range(data, md->mask, head,
149                           old, &start, &end, backward))
150                 return -1;
151
152         if (start == end)
153                 return 0;
154
155         rec->samples++;
156
157         size = end - start;
158         if (size > (unsigned long)(md->mask) + 1) {
159                 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
160
161                 md->prev = head;
162                 perf_mmap__consume(md, overwrite || backward);
163                 return 0;
164         }
165
166         if ((start & md->mask) + size != (end & md->mask)) {
167                 buf = &data[start & md->mask];
168                 size = md->mask + 1 - (start & md->mask);
169                 start += size;
170
171                 if (record__write(rec, buf, size) < 0) {
172                         rc = -1;
173                         goto out;
174                 }
175         }
176
177         buf = &data[start & md->mask];
178         size = end - start;
179         start += size;
180
181         if (record__write(rec, buf, size) < 0) {
182                 rc = -1;
183                 goto out;
184         }
185
186         md->prev = head;
187         perf_mmap__consume(md, overwrite || backward);
188 out:
189         return rc;
190 }
191
192 static volatile int done;
193 static volatile int signr = -1;
194 static volatile int child_finished;
195
196 static volatile int auxtrace_record__snapshot_started;
197 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
198 static DEFINE_TRIGGER(switch_output_trigger);
199
200 static void sig_handler(int sig)
201 {
202         if (sig == SIGCHLD)
203                 child_finished = 1;
204         else
205                 signr = sig;
206
207         done = 1;
208 }
209
210 static void sigsegv_handler(int sig)
211 {
212         perf_hooks__recover();
213         sighandler_dump_stack(sig);
214 }
215
216 static void record__sig_exit(void)
217 {
218         if (signr == -1)
219                 return;
220
221         signal(signr, SIG_DFL);
222         raise(signr);
223 }
224
225 #ifdef HAVE_AUXTRACE_SUPPORT
226
227 static int record__process_auxtrace(struct perf_tool *tool,
228                                     union perf_event *event, void *data1,
229                                     size_t len1, void *data2, size_t len2)
230 {
231         struct record *rec = container_of(tool, struct record, tool);
232         struct perf_data_file *file = &rec->file;
233         size_t padding;
234         u8 pad[8] = {0};
235
236         if (!perf_data_file__is_pipe(file)) {
237                 off_t file_offset;
238                 int fd = perf_data_file__fd(file);
239                 int err;
240
241                 file_offset = lseek(fd, 0, SEEK_CUR);
242                 if (file_offset == -1)
243                         return -1;
244                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
245                                                      event, file_offset);
246                 if (err)
247                         return err;
248         }
249
250         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
251         padding = (len1 + len2) & 7;
252         if (padding)
253                 padding = 8 - padding;
254
255         record__write(rec, event, event->header.size);
256         record__write(rec, data1, len1);
257         if (len2)
258                 record__write(rec, data2, len2);
259         record__write(rec, &pad, padding);
260
261         return 0;
262 }
263
264 static int record__auxtrace_mmap_read(struct record *rec,
265                                       struct auxtrace_mmap *mm)
266 {
267         int ret;
268
269         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
270                                   record__process_auxtrace);
271         if (ret < 0)
272                 return ret;
273
274         if (ret)
275                 rec->samples++;
276
277         return 0;
278 }
279
280 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
281                                                struct auxtrace_mmap *mm)
282 {
283         int ret;
284
285         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
286                                            record__process_auxtrace,
287                                            rec->opts.auxtrace_snapshot_size);
288         if (ret < 0)
289                 return ret;
290
291         if (ret)
292                 rec->samples++;
293
294         return 0;
295 }
296
297 static int record__auxtrace_read_snapshot_all(struct record *rec)
298 {
299         int i;
300         int rc = 0;
301
302         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
303                 struct auxtrace_mmap *mm =
304                                 &rec->evlist->mmap[i].auxtrace_mmap;
305
306                 if (!mm->base)
307                         continue;
308
309                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
310                         rc = -1;
311                         goto out;
312                 }
313         }
314 out:
315         return rc;
316 }
317
318 static void record__read_auxtrace_snapshot(struct record *rec)
319 {
320         pr_debug("Recording AUX area tracing snapshot\n");
321         if (record__auxtrace_read_snapshot_all(rec) < 0) {
322                 trigger_error(&auxtrace_snapshot_trigger);
323         } else {
324                 if (auxtrace_record__snapshot_finish(rec->itr))
325                         trigger_error(&auxtrace_snapshot_trigger);
326                 else
327                         trigger_ready(&auxtrace_snapshot_trigger);
328         }
329 }
330
331 #else
332
333 static inline
334 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
335                                struct auxtrace_mmap *mm __maybe_unused)
336 {
337         return 0;
338 }
339
340 static inline
341 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
342 {
343 }
344
345 static inline
346 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
347 {
348         return 0;
349 }
350
351 #endif
352
353 static int record__mmap_evlist(struct record *rec,
354                                struct perf_evlist *evlist)
355 {
356         struct record_opts *opts = &rec->opts;
357         char msg[512];
358
359         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
360                                  opts->auxtrace_mmap_pages,
361                                  opts->auxtrace_snapshot_mode) < 0) {
362                 if (errno == EPERM) {
363                         pr_err("Permission error mapping pages.\n"
364                                "Consider increasing "
365                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
366                                "or try again with a smaller value of -m/--mmap_pages.\n"
367                                "(current value: %u,%u)\n",
368                                opts->mmap_pages, opts->auxtrace_mmap_pages);
369                         return -errno;
370                 } else {
371                         pr_err("failed to mmap with %d (%s)\n", errno,
372                                 str_error_r(errno, msg, sizeof(msg)));
373                         if (errno)
374                                 return -errno;
375                         else
376                                 return -EINVAL;
377                 }
378         }
379         return 0;
380 }
381
382 static int record__mmap(struct record *rec)
383 {
384         return record__mmap_evlist(rec, rec->evlist);
385 }
386
387 static int record__open(struct record *rec)
388 {
389         char msg[512];
390         struct perf_evsel *pos;
391         struct perf_evlist *evlist = rec->evlist;
392         struct perf_session *session = rec->session;
393         struct record_opts *opts = &rec->opts;
394         struct perf_evsel_config_term *err_term;
395         int rc = 0;
396
397         perf_evlist__config(evlist, opts, &callchain_param);
398
399         evlist__for_each_entry(evlist, pos) {
400 try_again:
401                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
402                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
403                                 if (verbose)
404                                         ui__warning("%s\n", msg);
405                                 goto try_again;
406                         }
407
408                         rc = -errno;
409                         perf_evsel__open_strerror(pos, &opts->target,
410                                                   errno, msg, sizeof(msg));
411                         ui__error("%s\n", msg);
412                         goto out;
413                 }
414         }
415
416         if (perf_evlist__apply_filters(evlist, &pos)) {
417                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
418                         pos->filter, perf_evsel__name(pos), errno,
419                         str_error_r(errno, msg, sizeof(msg)));
420                 rc = -1;
421                 goto out;
422         }
423
424         if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
425                 error("failed to set config \"%s\" on event %s with %d (%s)\n",
426                       err_term->val.drv_cfg, perf_evsel__name(pos), errno,
427                       str_error_r(errno, msg, sizeof(msg)));
428                 rc = -1;
429                 goto out;
430         }
431
432         rc = record__mmap(rec);
433         if (rc)
434                 goto out;
435
436         session->evlist = evlist;
437         perf_session__set_id_hdr_size(session);
438 out:
439         return rc;
440 }
441
442 static int process_sample_event(struct perf_tool *tool,
443                                 union perf_event *event,
444                                 struct perf_sample *sample,
445                                 struct perf_evsel *evsel,
446                                 struct machine *machine)
447 {
448         struct record *rec = container_of(tool, struct record, tool);
449
450         rec->samples++;
451
452         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
453 }
454
455 static int process_buildids(struct record *rec)
456 {
457         struct perf_data_file *file  = &rec->file;
458         struct perf_session *session = rec->session;
459
460         if (file->size == 0)
461                 return 0;
462
463         /*
464          * During this process, it'll load kernel map and replace the
465          * dso->long_name to a real pathname it found.  In this case
466          * we prefer the vmlinux path like
467          *   /lib/modules/3.16.4/build/vmlinux
468          *
469          * rather than build-id path (in debug directory).
470          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
471          */
472         symbol_conf.ignore_vmlinux_buildid = true;
473
474         /*
475          * If --buildid-all is given, it marks all DSO regardless of hits,
476          * so no need to process samples.
477          */
478         if (rec->buildid_all)
479                 rec->tool.sample = NULL;
480
481         return perf_session__process_events(session);
482 }
483
484 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
485 {
486         int err;
487         struct perf_tool *tool = data;
488         /*
489          *As for guest kernel when processing subcommand record&report,
490          *we arrange module mmap prior to guest kernel mmap and trigger
491          *a preload dso because default guest module symbols are loaded
492          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
493          *method is used to avoid symbol missing when the first addr is
494          *in module instead of in guest kernel.
495          */
496         err = perf_event__synthesize_modules(tool, process_synthesized_event,
497                                              machine);
498         if (err < 0)
499                 pr_err("Couldn't record guest kernel [%d]'s reference"
500                        " relocation symbol.\n", machine->pid);
501
502         /*
503          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
504          * have no _text sometimes.
505          */
506         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
507                                                  machine);
508         if (err < 0)
509                 pr_err("Couldn't record guest kernel [%d]'s reference"
510                        " relocation symbol.\n", machine->pid);
511 }
512
513 static struct perf_event_header finished_round_event = {
514         .size = sizeof(struct perf_event_header),
515         .type = PERF_RECORD_FINISHED_ROUND,
516 };
517
518 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
519                                     bool backward)
520 {
521         u64 bytes_written = rec->bytes_written;
522         int i;
523         int rc = 0;
524         struct perf_mmap *maps;
525
526         if (!evlist)
527                 return 0;
528
529         maps = backward ? evlist->backward_mmap : evlist->mmap;
530         if (!maps)
531                 return 0;
532
533         if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
534                 return 0;
535
536         for (i = 0; i < evlist->nr_mmaps; i++) {
537                 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
538
539                 if (maps[i].base) {
540                         if (record__mmap_read(rec, &maps[i],
541                                               evlist->overwrite, backward) != 0) {
542                                 rc = -1;
543                                 goto out;
544                         }
545                 }
546
547                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
548                     record__auxtrace_mmap_read(rec, mm) != 0) {
549                         rc = -1;
550                         goto out;
551                 }
552         }
553
554         /*
555          * Mark the round finished in case we wrote
556          * at least one event.
557          */
558         if (bytes_written != rec->bytes_written)
559                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
560
561         if (backward)
562                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
563 out:
564         return rc;
565 }
566
567 static int record__mmap_read_all(struct record *rec)
568 {
569         int err;
570
571         err = record__mmap_read_evlist(rec, rec->evlist, false);
572         if (err)
573                 return err;
574
575         return record__mmap_read_evlist(rec, rec->evlist, true);
576 }
577
578 static void record__init_features(struct record *rec)
579 {
580         struct perf_session *session = rec->session;
581         int feat;
582
583         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
584                 perf_header__set_feat(&session->header, feat);
585
586         if (rec->no_buildid)
587                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
588
589         if (!have_tracepoints(&rec->evlist->entries))
590                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
591
592         if (!rec->opts.branch_stack)
593                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
594
595         if (!rec->opts.full_auxtrace)
596                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
597
598         perf_header__clear_feat(&session->header, HEADER_STAT);
599 }
600
601 static void
602 record__finish_output(struct record *rec)
603 {
604         struct perf_data_file *file = &rec->file;
605         int fd = perf_data_file__fd(file);
606
607         if (file->is_pipe)
608                 return;
609
610         rec->session->header.data_size += rec->bytes_written;
611         file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
612
613         if (!rec->no_buildid) {
614                 process_buildids(rec);
615
616                 if (rec->buildid_all)
617                         dsos__hit_all(rec->session);
618         }
619         perf_session__write_header(rec->session, rec->evlist, fd, true);
620
621         return;
622 }
623
624 static int record__synthesize_workload(struct record *rec, bool tail)
625 {
626         struct {
627                 struct thread_map map;
628                 struct thread_map_data map_data;
629         } thread_map;
630
631         if (rec->opts.tail_synthesize != tail)
632                 return 0;
633
634         thread_map.map.nr = 1;
635         thread_map.map.map[0].pid = rec->evlist->workload.pid;
636         thread_map.map.map[0].comm = NULL;
637         return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
638                                                  process_synthesized_event,
639                                                  &rec->session->machines.host,
640                                                  rec->opts.sample_address,
641                                                  rec->opts.proc_map_timeout);
642 }
643
644 static int record__synthesize(struct record *rec, bool tail);
645
646 static int
647 record__switch_output(struct record *rec, bool at_exit)
648 {
649         struct perf_data_file *file = &rec->file;
650         int fd, err;
651
652         /* Same Size:      "2015122520103046"*/
653         char timestamp[] = "InvalidTimestamp";
654
655         record__synthesize(rec, true);
656         if (target__none(&rec->opts.target))
657                 record__synthesize_workload(rec, true);
658
659         rec->samples = 0;
660         record__finish_output(rec);
661         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
662         if (err) {
663                 pr_err("Failed to get current timestamp\n");
664                 return -EINVAL;
665         }
666
667         fd = perf_data_file__switch(file, timestamp,
668                                     rec->session->header.data_offset,
669                                     at_exit);
670         if (fd >= 0 && !at_exit) {
671                 rec->bytes_written = 0;
672                 rec->session->header.data_size = 0;
673         }
674
675         if (!quiet)
676                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
677                         file->path, timestamp);
678
679         /* Output tracking events */
680         if (!at_exit) {
681                 record__synthesize(rec, false);
682
683                 /*
684                  * In 'perf record --switch-output' without -a,
685                  * record__synthesize() in record__switch_output() won't
686                  * generate tracking events because there's no thread_map
687                  * in evlist. Which causes newly created perf.data doesn't
688                  * contain map and comm information.
689                  * Create a fake thread_map and directly call
690                  * perf_event__synthesize_thread_map() for those events.
691                  */
692                 if (target__none(&rec->opts.target))
693                         record__synthesize_workload(rec, false);
694         }
695         return fd;
696 }
697
698 static volatile int workload_exec_errno;
699
700 /*
701  * perf_evlist__prepare_workload will send a SIGUSR1
702  * if the fork fails, since we asked by setting its
703  * want_signal to true.
704  */
705 static void workload_exec_failed_signal(int signo __maybe_unused,
706                                         siginfo_t *info,
707                                         void *ucontext __maybe_unused)
708 {
709         workload_exec_errno = info->si_value.sival_int;
710         done = 1;
711         child_finished = 1;
712 }
713
714 static void snapshot_sig_handler(int sig);
715
716 int __weak
717 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
718                             struct perf_tool *tool __maybe_unused,
719                             perf_event__handler_t process __maybe_unused,
720                             struct machine *machine __maybe_unused)
721 {
722         return 0;
723 }
724
725 static const struct perf_event_mmap_page *
726 perf_evlist__pick_pc(struct perf_evlist *evlist)
727 {
728         if (evlist) {
729                 if (evlist->mmap && evlist->mmap[0].base)
730                         return evlist->mmap[0].base;
731                 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
732                         return evlist->backward_mmap[0].base;
733         }
734         return NULL;
735 }
736
737 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
738 {
739         const struct perf_event_mmap_page *pc;
740
741         pc = perf_evlist__pick_pc(rec->evlist);
742         if (pc)
743                 return pc;
744         return NULL;
745 }
746
747 static int record__synthesize(struct record *rec, bool tail)
748 {
749         struct perf_session *session = rec->session;
750         struct machine *machine = &session->machines.host;
751         struct perf_data_file *file = &rec->file;
752         struct record_opts *opts = &rec->opts;
753         struct perf_tool *tool = &rec->tool;
754         int fd = perf_data_file__fd(file);
755         int err = 0;
756
757         if (rec->opts.tail_synthesize != tail)
758                 return 0;
759
760         if (file->is_pipe) {
761                 err = perf_event__synthesize_attrs(tool, session,
762                                                    process_synthesized_event);
763                 if (err < 0) {
764                         pr_err("Couldn't synthesize attrs.\n");
765                         goto out;
766                 }
767
768                 if (have_tracepoints(&rec->evlist->entries)) {
769                         /*
770                          * FIXME err <= 0 here actually means that
771                          * there were no tracepoints so its not really
772                          * an error, just that we don't need to
773                          * synthesize anything.  We really have to
774                          * return this more properly and also
775                          * propagate errors that now are calling die()
776                          */
777                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
778                                                                   process_synthesized_event);
779                         if (err <= 0) {
780                                 pr_err("Couldn't record tracing data.\n");
781                                 goto out;
782                         }
783                         rec->bytes_written += err;
784                 }
785         }
786
787         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
788                                           process_synthesized_event, machine);
789         if (err)
790                 goto out;
791
792         if (rec->opts.full_auxtrace) {
793                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
794                                         session, process_synthesized_event);
795                 if (err)
796                         goto out;
797         }
798
799         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
800                                                  machine);
801         WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
802                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
803                            "Check /proc/kallsyms permission or run as root.\n");
804
805         err = perf_event__synthesize_modules(tool, process_synthesized_event,
806                                              machine);
807         WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
808                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
809                            "Check /proc/modules permission or run as root.\n");
810
811         if (perf_guest) {
812                 machines__process_guests(&session->machines,
813                                          perf_event__synthesize_guest_os, tool);
814         }
815
816         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
817                                             process_synthesized_event, opts->sample_address,
818                                             opts->proc_map_timeout);
819 out:
820         return err;
821 }
822
823 static int __cmd_record(struct record *rec, int argc, const char **argv)
824 {
825         int err;
826         int status = 0;
827         unsigned long waking = 0;
828         const bool forks = argc > 0;
829         struct machine *machine;
830         struct perf_tool *tool = &rec->tool;
831         struct record_opts *opts = &rec->opts;
832         struct perf_data_file *file = &rec->file;
833         struct perf_session *session;
834         bool disabled = false, draining = false;
835         int fd;
836
837         rec->progname = argv[0];
838
839         atexit(record__sig_exit);
840         signal(SIGCHLD, sig_handler);
841         signal(SIGINT, sig_handler);
842         signal(SIGTERM, sig_handler);
843         signal(SIGSEGV, sigsegv_handler);
844
845         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
846                 signal(SIGUSR2, snapshot_sig_handler);
847                 if (rec->opts.auxtrace_snapshot_mode)
848                         trigger_on(&auxtrace_snapshot_trigger);
849                 if (rec->switch_output)
850                         trigger_on(&switch_output_trigger);
851         } else {
852                 signal(SIGUSR2, SIG_IGN);
853         }
854
855         session = perf_session__new(file, false, tool);
856         if (session == NULL) {
857                 pr_err("Perf session creation failed.\n");
858                 return -1;
859         }
860
861         fd = perf_data_file__fd(file);
862         rec->session = session;
863
864         record__init_features(rec);
865
866         if (forks) {
867                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
868                                                     argv, file->is_pipe,
869                                                     workload_exec_failed_signal);
870                 if (err < 0) {
871                         pr_err("Couldn't run the workload!\n");
872                         status = err;
873                         goto out_delete_session;
874                 }
875         }
876
877         if (record__open(rec) != 0) {
878                 err = -1;
879                 goto out_child;
880         }
881
882         err = bpf__apply_obj_config();
883         if (err) {
884                 char errbuf[BUFSIZ];
885
886                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
887                 pr_err("ERROR: Apply config to BPF failed: %s\n",
888                          errbuf);
889                 goto out_child;
890         }
891
892         /*
893          * Normally perf_session__new would do this, but it doesn't have the
894          * evlist.
895          */
896         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
897                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
898                 rec->tool.ordered_events = false;
899         }
900
901         if (!rec->evlist->nr_groups)
902                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
903
904         if (file->is_pipe) {
905                 err = perf_header__write_pipe(fd);
906                 if (err < 0)
907                         goto out_child;
908         } else {
909                 err = perf_session__write_header(session, rec->evlist, fd, false);
910                 if (err < 0)
911                         goto out_child;
912         }
913
914         if (!rec->no_buildid
915             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
916                 pr_err("Couldn't generate buildids. "
917                        "Use --no-buildid to profile anyway.\n");
918                 err = -1;
919                 goto out_child;
920         }
921
922         machine = &session->machines.host;
923
924         err = record__synthesize(rec, false);
925         if (err < 0)
926                 goto out_child;
927
928         if (rec->realtime_prio) {
929                 struct sched_param param;
930
931                 param.sched_priority = rec->realtime_prio;
932                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
933                         pr_err("Could not set realtime priority.\n");
934                         err = -1;
935                         goto out_child;
936                 }
937         }
938
939         /*
940          * When perf is starting the traced process, all the events
941          * (apart from group members) have enable_on_exec=1 set,
942          * so don't spoil it by prematurely enabling them.
943          */
944         if (!target__none(&opts->target) && !opts->initial_delay)
945                 perf_evlist__enable(rec->evlist);
946
947         /*
948          * Let the child rip
949          */
950         if (forks) {
951                 union perf_event *event;
952
953                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
954                 if (event == NULL) {
955                         err = -ENOMEM;
956                         goto out_child;
957                 }
958
959                 /*
960                  * Some H/W events are generated before COMM event
961                  * which is emitted during exec(), so perf script
962                  * cannot see a correct process name for those events.
963                  * Synthesize COMM event to prevent it.
964                  */
965                 perf_event__synthesize_comm(tool, event,
966                                             rec->evlist->workload.pid,
967                                             process_synthesized_event,
968                                             machine);
969                 free(event);
970
971                 perf_evlist__start_workload(rec->evlist);
972         }
973
974         if (opts->initial_delay) {
975                 usleep(opts->initial_delay * USEC_PER_MSEC);
976                 perf_evlist__enable(rec->evlist);
977         }
978
979         trigger_ready(&auxtrace_snapshot_trigger);
980         trigger_ready(&switch_output_trigger);
981         perf_hooks__invoke_record_start();
982         for (;;) {
983                 unsigned long long hits = rec->samples;
984
985                 /*
986                  * rec->evlist->bkw_mmap_state is possible to be
987                  * BKW_MMAP_EMPTY here: when done == true and
988                  * hits != rec->samples in previous round.
989                  *
990                  * perf_evlist__toggle_bkw_mmap ensure we never
991                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
992                  */
993                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
994                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
995
996                 if (record__mmap_read_all(rec) < 0) {
997                         trigger_error(&auxtrace_snapshot_trigger);
998                         trigger_error(&switch_output_trigger);
999                         err = -1;
1000                         goto out_child;
1001                 }
1002
1003                 if (auxtrace_record__snapshot_started) {
1004                         auxtrace_record__snapshot_started = 0;
1005                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1006                                 record__read_auxtrace_snapshot(rec);
1007                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1008                                 pr_err("AUX area tracing snapshot failed\n");
1009                                 err = -1;
1010                                 goto out_child;
1011                         }
1012                 }
1013
1014                 if (trigger_is_hit(&switch_output_trigger)) {
1015                         /*
1016                          * If switch_output_trigger is hit, the data in
1017                          * overwritable ring buffer should have been collected,
1018                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1019                          *
1020                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1021                          * record__mmap_read_all() didn't collect data from
1022                          * overwritable ring buffer. Read again.
1023                          */
1024                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1025                                 continue;
1026                         trigger_ready(&switch_output_trigger);
1027
1028                         /*
1029                          * Reenable events in overwrite ring buffer after
1030                          * record__mmap_read_all(): we should have collected
1031                          * data from it.
1032                          */
1033                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1034
1035                         if (!quiet)
1036                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1037                                         waking);
1038                         waking = 0;
1039                         fd = record__switch_output(rec, false);
1040                         if (fd < 0) {
1041                                 pr_err("Failed to switch to new file\n");
1042                                 trigger_error(&switch_output_trigger);
1043                                 err = fd;
1044                                 goto out_child;
1045                         }
1046                 }
1047
1048                 if (hits == rec->samples) {
1049                         if (done || draining)
1050                                 break;
1051                         err = perf_evlist__poll(rec->evlist, -1);
1052                         /*
1053                          * Propagate error, only if there's any. Ignore positive
1054                          * number of returned events and interrupt error.
1055                          */
1056                         if (err > 0 || (err < 0 && errno == EINTR))
1057                                 err = 0;
1058                         waking++;
1059
1060                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1061                                 draining = true;
1062                 }
1063
1064                 /*
1065                  * When perf is starting the traced process, at the end events
1066                  * die with the process and we wait for that. Thus no need to
1067                  * disable events in this case.
1068                  */
1069                 if (done && !disabled && !target__none(&opts->target)) {
1070                         trigger_off(&auxtrace_snapshot_trigger);
1071                         perf_evlist__disable(rec->evlist);
1072                         disabled = true;
1073                 }
1074         }
1075         trigger_off(&auxtrace_snapshot_trigger);
1076         trigger_off(&switch_output_trigger);
1077
1078         if (forks && workload_exec_errno) {
1079                 char msg[STRERR_BUFSIZE];
1080                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1081                 pr_err("Workload failed: %s\n", emsg);
1082                 err = -1;
1083                 goto out_child;
1084         }
1085
1086         if (!quiet)
1087                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1088
1089         if (target__none(&rec->opts.target))
1090                 record__synthesize_workload(rec, true);
1091
1092 out_child:
1093         if (forks) {
1094                 int exit_status;
1095
1096                 if (!child_finished)
1097                         kill(rec->evlist->workload.pid, SIGTERM);
1098
1099                 wait(&exit_status);
1100
1101                 if (err < 0)
1102                         status = err;
1103                 else if (WIFEXITED(exit_status))
1104                         status = WEXITSTATUS(exit_status);
1105                 else if (WIFSIGNALED(exit_status))
1106                         signr = WTERMSIG(exit_status);
1107         } else
1108                 status = err;
1109
1110         record__synthesize(rec, true);
1111         /* this will be recalculated during process_buildids() */
1112         rec->samples = 0;
1113
1114         if (!err) {
1115                 if (!rec->timestamp_filename) {
1116                         record__finish_output(rec);
1117                 } else {
1118                         fd = record__switch_output(rec, true);
1119                         if (fd < 0) {
1120                                 status = fd;
1121                                 goto out_delete_session;
1122                         }
1123                 }
1124         }
1125
1126         perf_hooks__invoke_record_end();
1127
1128         if (!err && !quiet) {
1129                 char samples[128];
1130                 const char *postfix = rec->timestamp_filename ?
1131                                         ".<timestamp>" : "";
1132
1133                 if (rec->samples && !rec->opts.full_auxtrace)
1134                         scnprintf(samples, sizeof(samples),
1135                                   " (%" PRIu64 " samples)", rec->samples);
1136                 else
1137                         samples[0] = '\0';
1138
1139                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1140                         perf_data_file__size(file) / 1024.0 / 1024.0,
1141                         file->path, postfix, samples);
1142         }
1143
1144 out_delete_session:
1145         perf_session__delete(session);
1146         return status;
1147 }
1148
1149 static void callchain_debug(struct callchain_param *callchain)
1150 {
1151         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1152
1153         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1154
1155         if (callchain->record_mode == CALLCHAIN_DWARF)
1156                 pr_debug("callchain: stack dump size %d\n",
1157                          callchain->dump_size);
1158 }
1159
1160 int record_opts__parse_callchain(struct record_opts *record,
1161                                  struct callchain_param *callchain,
1162                                  const char *arg, bool unset)
1163 {
1164         int ret;
1165         callchain->enabled = !unset;
1166
1167         /* --no-call-graph */
1168         if (unset) {
1169                 callchain->record_mode = CALLCHAIN_NONE;
1170                 pr_debug("callchain: disabled\n");
1171                 return 0;
1172         }
1173
1174         ret = parse_callchain_record_opt(arg, callchain);
1175         if (!ret) {
1176                 /* Enable data address sampling for DWARF unwind. */
1177                 if (callchain->record_mode == CALLCHAIN_DWARF)
1178                         record->sample_address = true;
1179                 callchain_debug(callchain);
1180         }
1181
1182         return ret;
1183 }
1184
1185 int record_parse_callchain_opt(const struct option *opt,
1186                                const char *arg,
1187                                int unset)
1188 {
1189         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1190 }
1191
1192 int record_callchain_opt(const struct option *opt,
1193                          const char *arg __maybe_unused,
1194                          int unset __maybe_unused)
1195 {
1196         struct callchain_param *callchain = opt->value;
1197
1198         callchain->enabled = true;
1199
1200         if (callchain->record_mode == CALLCHAIN_NONE)
1201                 callchain->record_mode = CALLCHAIN_FP;
1202
1203         callchain_debug(callchain);
1204         return 0;
1205 }
1206
1207 static int perf_record_config(const char *var, const char *value, void *cb)
1208 {
1209         struct record *rec = cb;
1210
1211         if (!strcmp(var, "record.build-id")) {
1212                 if (!strcmp(value, "cache"))
1213                         rec->no_buildid_cache = false;
1214                 else if (!strcmp(value, "no-cache"))
1215                         rec->no_buildid_cache = true;
1216                 else if (!strcmp(value, "skip"))
1217                         rec->no_buildid = true;
1218                 else
1219                         return -1;
1220                 return 0;
1221         }
1222         if (!strcmp(var, "record.call-graph"))
1223                 var = "call-graph.record-mode"; /* fall-through */
1224
1225         return perf_default_config(var, value, cb);
1226 }
1227
1228 struct clockid_map {
1229         const char *name;
1230         int clockid;
1231 };
1232
1233 #define CLOCKID_MAP(n, c)       \
1234         { .name = n, .clockid = (c), }
1235
1236 #define CLOCKID_END     { .name = NULL, }
1237
1238
1239 /*
1240  * Add the missing ones, we need to build on many distros...
1241  */
1242 #ifndef CLOCK_MONOTONIC_RAW
1243 #define CLOCK_MONOTONIC_RAW 4
1244 #endif
1245 #ifndef CLOCK_BOOTTIME
1246 #define CLOCK_BOOTTIME 7
1247 #endif
1248 #ifndef CLOCK_TAI
1249 #define CLOCK_TAI 11
1250 #endif
1251
1252 static const struct clockid_map clockids[] = {
1253         /* available for all events, NMI safe */
1254         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1255         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1256
1257         /* available for some events */
1258         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1259         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1260         CLOCKID_MAP("tai", CLOCK_TAI),
1261
1262         /* available for the lazy */
1263         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1264         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1265         CLOCKID_MAP("real", CLOCK_REALTIME),
1266         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1267
1268         CLOCKID_END,
1269 };
1270
1271 static int parse_clockid(const struct option *opt, const char *str, int unset)
1272 {
1273         struct record_opts *opts = (struct record_opts *)opt->value;
1274         const struct clockid_map *cm;
1275         const char *ostr = str;
1276
1277         if (unset) {
1278                 opts->use_clockid = 0;
1279                 return 0;
1280         }
1281
1282         /* no arg passed */
1283         if (!str)
1284                 return 0;
1285
1286         /* no setting it twice */
1287         if (opts->use_clockid)
1288                 return -1;
1289
1290         opts->use_clockid = true;
1291
1292         /* if its a number, we're done */
1293         if (sscanf(str, "%d", &opts->clockid) == 1)
1294                 return 0;
1295
1296         /* allow a "CLOCK_" prefix to the name */
1297         if (!strncasecmp(str, "CLOCK_", 6))
1298                 str += 6;
1299
1300         for (cm = clockids; cm->name; cm++) {
1301                 if (!strcasecmp(str, cm->name)) {
1302                         opts->clockid = cm->clockid;
1303                         return 0;
1304                 }
1305         }
1306
1307         opts->use_clockid = false;
1308         ui__warning("unknown clockid %s, check man page\n", ostr);
1309         return -1;
1310 }
1311
1312 static int record__parse_mmap_pages(const struct option *opt,
1313                                     const char *str,
1314                                     int unset __maybe_unused)
1315 {
1316         struct record_opts *opts = opt->value;
1317         char *s, *p;
1318         unsigned int mmap_pages;
1319         int ret;
1320
1321         if (!str)
1322                 return -EINVAL;
1323
1324         s = strdup(str);
1325         if (!s)
1326                 return -ENOMEM;
1327
1328         p = strchr(s, ',');
1329         if (p)
1330                 *p = '\0';
1331
1332         if (*s) {
1333                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1334                 if (ret)
1335                         goto out_free;
1336                 opts->mmap_pages = mmap_pages;
1337         }
1338
1339         if (!p) {
1340                 ret = 0;
1341                 goto out_free;
1342         }
1343
1344         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1345         if (ret)
1346                 goto out_free;
1347
1348         opts->auxtrace_mmap_pages = mmap_pages;
1349
1350 out_free:
1351         free(s);
1352         return ret;
1353 }
1354
1355 static const char * const __record_usage[] = {
1356         "perf record [<options>] [<command>]",
1357         "perf record [<options>] -- <command> [<options>]",
1358         NULL
1359 };
1360 const char * const *record_usage = __record_usage;
1361
1362 /*
1363  * XXX Ideally would be local to cmd_record() and passed to a record__new
1364  * because we need to have access to it in record__exit, that is called
1365  * after cmd_record() exits, but since record_options need to be accessible to
1366  * builtin-script, leave it here.
1367  *
1368  * At least we don't ouch it in all the other functions here directly.
1369  *
1370  * Just say no to tons of global variables, sigh.
1371  */
1372 static struct record record = {
1373         .opts = {
1374                 .sample_time         = true,
1375                 .mmap_pages          = UINT_MAX,
1376                 .user_freq           = UINT_MAX,
1377                 .user_interval       = ULLONG_MAX,
1378                 .freq                = 4000,
1379                 .target              = {
1380                         .uses_mmap   = true,
1381                         .default_per_cpu = true,
1382                 },
1383                 .proc_map_timeout     = 500,
1384         },
1385         .tool = {
1386                 .sample         = process_sample_event,
1387                 .fork           = perf_event__process_fork,
1388                 .exit           = perf_event__process_exit,
1389                 .comm           = perf_event__process_comm,
1390                 .mmap           = perf_event__process_mmap,
1391                 .mmap2          = perf_event__process_mmap2,
1392                 .ordered_events = true,
1393         },
1394 };
1395
1396 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1397         "\n\t\t\t\tDefault: fp";
1398
1399 static bool dry_run;
1400
1401 /*
1402  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1403  * with it and switch to use the library functions in perf_evlist that came
1404  * from builtin-record.c, i.e. use record_opts,
1405  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1406  * using pipes, etc.
1407  */
1408 static struct option __record_options[] = {
1409         OPT_CALLBACK('e', "event", &record.evlist, "event",
1410                      "event selector. use 'perf list' to list available events",
1411                      parse_events_option),
1412         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1413                      "event filter", parse_filter),
1414         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1415                            NULL, "don't record events from perf itself",
1416                            exclude_perf),
1417         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1418                     "record events on existing process id"),
1419         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1420                     "record events on existing thread id"),
1421         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1422                     "collect data with this RT SCHED_FIFO priority"),
1423         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1424                     "collect data without buffering"),
1425         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1426                     "collect raw sample records from all opened counters"),
1427         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1428                             "system-wide collection from all CPUs"),
1429         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1430                     "list of cpus to monitor"),
1431         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1432         OPT_STRING('o', "output", &record.file.path, "file",
1433                     "output file name"),
1434         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1435                         &record.opts.no_inherit_set,
1436                         "child tasks do not inherit counters"),
1437         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1438                     "synthesize non-sample events at the end of output"),
1439         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1440         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1441         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1442                      "number of mmap data pages and AUX area tracing mmap pages",
1443                      record__parse_mmap_pages),
1444         OPT_BOOLEAN(0, "group", &record.opts.group,
1445                     "put the counters into a counter group"),
1446         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1447                            NULL, "enables call-graph recording" ,
1448                            &record_callchain_opt),
1449         OPT_CALLBACK(0, "call-graph", &record.opts,
1450                      "record_mode[,record_size]", record_callchain_help,
1451                      &record_parse_callchain_opt),
1452         OPT_INCR('v', "verbose", &verbose,
1453                     "be more verbose (show counter open errors, etc)"),
1454         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1455         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1456                     "per thread counts"),
1457         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1458         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1459         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1460                         &record.opts.sample_time_set,
1461                         "Record the sample timestamps"),
1462         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1463         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1464                     "don't sample"),
1465         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1466                         &record.no_buildid_cache_set,
1467                         "do not update the buildid cache"),
1468         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1469                         &record.no_buildid_set,
1470                         "do not collect buildids in perf.data"),
1471         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1472                      "monitor event in cgroup name only",
1473                      parse_cgroups),
1474         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1475                   "ms to wait before starting measurement after program start"),
1476         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1477                    "user to profile"),
1478
1479         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1480                      "branch any", "sample any taken branches",
1481                      parse_branch_stack),
1482
1483         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1484                      "branch filter mask", "branch stack filter modes",
1485                      parse_branch_stack),
1486         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1487                     "sample by weight (on special events only)"),
1488         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1489                     "sample transaction flags (special events only)"),
1490         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1491                     "use per-thread mmaps"),
1492         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1493                     "sample selected machine registers on interrupt,"
1494                     " use -I ? to list register names", parse_regs),
1495         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1496                     "Record running/enabled time of read (:S) events"),
1497         OPT_CALLBACK('k', "clockid", &record.opts,
1498         "clockid", "clockid to use for events, see clock_gettime()",
1499         parse_clockid),
1500         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1501                           "opts", "AUX area tracing Snapshot Mode", ""),
1502         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1503                         "per thread proc mmap processing timeout in ms"),
1504         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1505                     "Record context switch events"),
1506         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1507                          "Configure all used events to run in kernel space.",
1508                          PARSE_OPT_EXCLUSIVE),
1509         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1510                          "Configure all used events to run in user space.",
1511                          PARSE_OPT_EXCLUSIVE),
1512         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1513                    "clang binary to use for compiling BPF scriptlets"),
1514         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1515                    "options passed to clang when compiling BPF scriptlets"),
1516         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1517                    "file", "vmlinux pathname"),
1518         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1519                     "Record build-id of all DSOs regardless of hits"),
1520         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1521                     "append timestamp to output filename"),
1522         OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1523                     "Switch output when receive SIGUSR2"),
1524         OPT_BOOLEAN(0, "dry-run", &dry_run,
1525                     "Parse options then exit"),
1526         OPT_END()
1527 };
1528
1529 struct option *record_options = __record_options;
1530
1531 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1532 {
1533         int err;
1534         struct record *rec = &record;
1535         char errbuf[BUFSIZ];
1536
1537 #ifndef HAVE_LIBBPF_SUPPORT
1538 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1539         set_nobuild('\0', "clang-path", true);
1540         set_nobuild('\0', "clang-opt", true);
1541 # undef set_nobuild
1542 #endif
1543
1544 #ifndef HAVE_BPF_PROLOGUE
1545 # if !defined (HAVE_DWARF_SUPPORT)
1546 #  define REASON  "NO_DWARF=1"
1547 # elif !defined (HAVE_LIBBPF_SUPPORT)
1548 #  define REASON  "NO_LIBBPF=1"
1549 # else
1550 #  define REASON  "this architecture doesn't support BPF prologue"
1551 # endif
1552 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1553         set_nobuild('\0', "vmlinux", true);
1554 # undef set_nobuild
1555 # undef REASON
1556 #endif
1557
1558         rec->evlist = perf_evlist__new();
1559         if (rec->evlist == NULL)
1560                 return -ENOMEM;
1561
1562         perf_config(perf_record_config, rec);
1563
1564         argc = parse_options(argc, argv, record_options, record_usage,
1565                             PARSE_OPT_STOP_AT_NON_OPTION);
1566         if (!argc && target__none(&rec->opts.target))
1567                 usage_with_options(record_usage, record_options);
1568
1569         if (nr_cgroups && !rec->opts.target.system_wide) {
1570                 usage_with_options_msg(record_usage, record_options,
1571                         "cgroup monitoring only available in system-wide mode");
1572
1573         }
1574         if (rec->opts.record_switch_events &&
1575             !perf_can_record_switch_events()) {
1576                 ui__error("kernel does not support recording context switch events\n");
1577                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1578                 return -EINVAL;
1579         }
1580
1581         if (rec->switch_output)
1582                 rec->timestamp_filename = true;
1583
1584         if (!rec->itr) {
1585                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1586                 if (err)
1587                         goto out;
1588         }
1589
1590         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1591                                               rec->opts.auxtrace_snapshot_opts);
1592         if (err)
1593                 goto out;
1594
1595         /*
1596          * Allow aliases to facilitate the lookup of symbols for address
1597          * filters. Refer to auxtrace_parse_filters().
1598          */
1599         symbol_conf.allow_aliases = true;
1600
1601         symbol__init(NULL);
1602
1603         err = auxtrace_parse_filters(rec->evlist);
1604         if (err)
1605                 goto out;
1606
1607         if (dry_run)
1608                 goto out;
1609
1610         err = bpf__setup_stdout(rec->evlist);
1611         if (err) {
1612                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1613                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1614                          errbuf);
1615                 goto out;
1616         }
1617
1618         err = -ENOMEM;
1619
1620         if (symbol_conf.kptr_restrict)
1621                 pr_warning(
1622 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1623 "check /proc/sys/kernel/kptr_restrict.\n\n"
1624 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1625 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1626 "Samples in kernel modules won't be resolved at all.\n\n"
1627 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1628 "even with a suitable vmlinux or kallsyms file.\n\n");
1629
1630         if (rec->no_buildid_cache || rec->no_buildid) {
1631                 disable_buildid_cache();
1632         } else if (rec->switch_output) {
1633                 /*
1634                  * In 'perf record --switch-output', disable buildid
1635                  * generation by default to reduce data file switching
1636                  * overhead. Still generate buildid if they are required
1637                  * explicitly using
1638                  *
1639                  *  perf record --switch-output --no-no-buildid \
1640                  *              --no-no-buildid-cache
1641                  *
1642                  * Following code equals to:
1643                  *
1644                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
1645                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1646                  *         disable_buildid_cache();
1647                  */
1648                 bool disable = true;
1649
1650                 if (rec->no_buildid_set && !rec->no_buildid)
1651                         disable = false;
1652                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1653                         disable = false;
1654                 if (disable) {
1655                         rec->no_buildid = true;
1656                         rec->no_buildid_cache = true;
1657                         disable_buildid_cache();
1658                 }
1659         }
1660
1661         if (record.opts.overwrite)
1662                 record.opts.tail_synthesize = true;
1663
1664         if (rec->evlist->nr_entries == 0 &&
1665             perf_evlist__add_default(rec->evlist) < 0) {
1666                 pr_err("Not enough memory for event selector list\n");
1667                 goto out;
1668         }
1669
1670         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1671                 rec->opts.no_inherit = true;
1672
1673         err = target__validate(&rec->opts.target);
1674         if (err) {
1675                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1676                 ui__warning("%s", errbuf);
1677         }
1678
1679         err = target__parse_uid(&rec->opts.target);
1680         if (err) {
1681                 int saved_errno = errno;
1682
1683                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1684                 ui__error("%s", errbuf);
1685
1686                 err = -saved_errno;
1687                 goto out;
1688         }
1689
1690         /* Enable ignoring missing threads when -u option is defined. */
1691         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1692
1693         err = -ENOMEM;
1694         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1695                 usage_with_options(record_usage, record_options);
1696
1697         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1698         if (err)
1699                 goto out;
1700
1701         /*
1702          * We take all buildids when the file contains
1703          * AUX area tracing data because we do not decode the
1704          * trace because it would take too long.
1705          */
1706         if (rec->opts.full_auxtrace)
1707                 rec->buildid_all = true;
1708
1709         if (record_opts__config(&rec->opts)) {
1710                 err = -EINVAL;
1711                 goto out;
1712         }
1713
1714         err = __cmd_record(&record, argc, argv);
1715 out:
1716         perf_evlist__delete(rec->evlist);
1717         symbol__exit();
1718         auxtrace_record__free(rec->itr);
1719         return err;
1720 }
1721
1722 static void snapshot_sig_handler(int sig __maybe_unused)
1723 {
1724         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1725                 trigger_hit(&auxtrace_snapshot_trigger);
1726                 auxtrace_record__snapshot_started = 1;
1727                 if (auxtrace_record__snapshot_start(record.itr))
1728                         trigger_error(&auxtrace_snapshot_trigger);
1729         }
1730
1731         if (trigger_is_ready(&switch_output_trigger))
1732                 trigger_hit(&switch_output_trigger);
1733 }