]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
Merge remote-tracking branch 'usb/usb-next'
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 #include "util/perf_regs.h"
31 #include "util/auxtrace.h"
32 #include "util/parse-branch-options.h"
33 #include "util/parse-regs-options.h"
34 #include "util/llvm-utils.h"
35
36 #include <unistd.h>
37 #include <sched.h>
38 #include <sys/mman.h>
39
40
41 struct record {
42         struct perf_tool        tool;
43         struct record_opts      opts;
44         u64                     bytes_written;
45         struct perf_data_file   file;
46         struct auxtrace_record  *itr;
47         struct perf_evlist      *evlist;
48         struct perf_session     *session;
49         const char              *progname;
50         int                     realtime_prio;
51         bool                    no_buildid;
52         bool                    no_buildid_set;
53         bool                    no_buildid_cache;
54         bool                    no_buildid_cache_set;
55         bool                    buildid_all;
56         unsigned long long      samples;
57 };
58
59 static int record__write(struct record *rec, void *bf, size_t size)
60 {
61         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
62                 pr_err("failed to write perf data, error: %m\n");
63                 return -1;
64         }
65
66         rec->bytes_written += size;
67         return 0;
68 }
69
70 static int process_synthesized_event(struct perf_tool *tool,
71                                      union perf_event *event,
72                                      struct perf_sample *sample __maybe_unused,
73                                      struct machine *machine __maybe_unused)
74 {
75         struct record *rec = container_of(tool, struct record, tool);
76         return record__write(rec, event, event->header.size);
77 }
78
79 static int record__mmap_read(struct record *rec, int idx)
80 {
81         struct perf_mmap *md = &rec->evlist->mmap[idx];
82         u64 head = perf_mmap__read_head(md);
83         u64 old = md->prev;
84         unsigned char *data = md->base + page_size;
85         unsigned long size;
86         void *buf;
87         int rc = 0;
88
89         if (old == head)
90                 return 0;
91
92         rec->samples++;
93
94         size = head - old;
95
96         if ((old & md->mask) + size != (head & md->mask)) {
97                 buf = &data[old & md->mask];
98                 size = md->mask + 1 - (old & md->mask);
99                 old += size;
100
101                 if (record__write(rec, buf, size) < 0) {
102                         rc = -1;
103                         goto out;
104                 }
105         }
106
107         buf = &data[old & md->mask];
108         size = head - old;
109         old += size;
110
111         if (record__write(rec, buf, size) < 0) {
112                 rc = -1;
113                 goto out;
114         }
115
116         md->prev = old;
117         perf_evlist__mmap_consume(rec->evlist, idx);
118 out:
119         return rc;
120 }
121
122 static volatile int done;
123 static volatile int signr = -1;
124 static volatile int child_finished;
125 static volatile int auxtrace_snapshot_enabled;
126 static volatile int auxtrace_snapshot_err;
127 static volatile int auxtrace_record__snapshot_started;
128
129 static void sig_handler(int sig)
130 {
131         if (sig == SIGCHLD)
132                 child_finished = 1;
133         else
134                 signr = sig;
135
136         done = 1;
137 }
138
139 static void record__sig_exit(void)
140 {
141         if (signr == -1)
142                 return;
143
144         signal(signr, SIG_DFL);
145         raise(signr);
146 }
147
148 #ifdef HAVE_AUXTRACE_SUPPORT
149
150 static int record__process_auxtrace(struct perf_tool *tool,
151                                     union perf_event *event, void *data1,
152                                     size_t len1, void *data2, size_t len2)
153 {
154         struct record *rec = container_of(tool, struct record, tool);
155         struct perf_data_file *file = &rec->file;
156         size_t padding;
157         u8 pad[8] = {0};
158
159         if (!perf_data_file__is_pipe(file)) {
160                 off_t file_offset;
161                 int fd = perf_data_file__fd(file);
162                 int err;
163
164                 file_offset = lseek(fd, 0, SEEK_CUR);
165                 if (file_offset == -1)
166                         return -1;
167                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
168                                                      event, file_offset);
169                 if (err)
170                         return err;
171         }
172
173         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
174         padding = (len1 + len2) & 7;
175         if (padding)
176                 padding = 8 - padding;
177
178         record__write(rec, event, event->header.size);
179         record__write(rec, data1, len1);
180         if (len2)
181                 record__write(rec, data2, len2);
182         record__write(rec, &pad, padding);
183
184         return 0;
185 }
186
187 static int record__auxtrace_mmap_read(struct record *rec,
188                                       struct auxtrace_mmap *mm)
189 {
190         int ret;
191
192         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
193                                   record__process_auxtrace);
194         if (ret < 0)
195                 return ret;
196
197         if (ret)
198                 rec->samples++;
199
200         return 0;
201 }
202
203 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
204                                                struct auxtrace_mmap *mm)
205 {
206         int ret;
207
208         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
209                                            record__process_auxtrace,
210                                            rec->opts.auxtrace_snapshot_size);
211         if (ret < 0)
212                 return ret;
213
214         if (ret)
215                 rec->samples++;
216
217         return 0;
218 }
219
220 static int record__auxtrace_read_snapshot_all(struct record *rec)
221 {
222         int i;
223         int rc = 0;
224
225         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
226                 struct auxtrace_mmap *mm =
227                                 &rec->evlist->mmap[i].auxtrace_mmap;
228
229                 if (!mm->base)
230                         continue;
231
232                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
233                         rc = -1;
234                         goto out;
235                 }
236         }
237 out:
238         return rc;
239 }
240
241 static void record__read_auxtrace_snapshot(struct record *rec)
242 {
243         pr_debug("Recording AUX area tracing snapshot\n");
244         if (record__auxtrace_read_snapshot_all(rec) < 0) {
245                 auxtrace_snapshot_err = -1;
246         } else {
247                 auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
248                 if (!auxtrace_snapshot_err)
249                         auxtrace_snapshot_enabled = 1;
250         }
251 }
252
253 #else
254
255 static inline
256 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
257                                struct auxtrace_mmap *mm __maybe_unused)
258 {
259         return 0;
260 }
261
262 static inline
263 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
264 {
265 }
266
267 static inline
268 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
269 {
270         return 0;
271 }
272
273 #endif
274
275 static int record__open(struct record *rec)
276 {
277         char msg[512];
278         struct perf_evsel *pos;
279         struct perf_evlist *evlist = rec->evlist;
280         struct perf_session *session = rec->session;
281         struct record_opts *opts = &rec->opts;
282         int rc = 0;
283
284         perf_evlist__config(evlist, opts);
285
286         evlist__for_each(evlist, pos) {
287 try_again:
288                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
289                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
290                                 if (verbose)
291                                         ui__warning("%s\n", msg);
292                                 goto try_again;
293                         }
294
295                         rc = -errno;
296                         perf_evsel__open_strerror(pos, &opts->target,
297                                                   errno, msg, sizeof(msg));
298                         ui__error("%s\n", msg);
299                         goto out;
300                 }
301         }
302
303         if (perf_evlist__apply_filters(evlist, &pos)) {
304                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
305                         pos->filter, perf_evsel__name(pos), errno,
306                         strerror_r(errno, msg, sizeof(msg)));
307                 rc = -1;
308                 goto out;
309         }
310
311         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
312                                  opts->auxtrace_mmap_pages,
313                                  opts->auxtrace_snapshot_mode) < 0) {
314                 if (errno == EPERM) {
315                         pr_err("Permission error mapping pages.\n"
316                                "Consider increasing "
317                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
318                                "or try again with a smaller value of -m/--mmap_pages.\n"
319                                "(current value: %u,%u)\n",
320                                opts->mmap_pages, opts->auxtrace_mmap_pages);
321                         rc = -errno;
322                 } else {
323                         pr_err("failed to mmap with %d (%s)\n", errno,
324                                 strerror_r(errno, msg, sizeof(msg)));
325                         rc = -errno;
326                 }
327                 goto out;
328         }
329
330         session->evlist = evlist;
331         perf_session__set_id_hdr_size(session);
332 out:
333         return rc;
334 }
335
336 static int process_sample_event(struct perf_tool *tool,
337                                 union perf_event *event,
338                                 struct perf_sample *sample,
339                                 struct perf_evsel *evsel,
340                                 struct machine *machine)
341 {
342         struct record *rec = container_of(tool, struct record, tool);
343
344         rec->samples++;
345
346         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
347 }
348
349 static int process_buildids(struct record *rec)
350 {
351         struct perf_data_file *file  = &rec->file;
352         struct perf_session *session = rec->session;
353
354         if (file->size == 0)
355                 return 0;
356
357         /*
358          * During this process, it'll load kernel map and replace the
359          * dso->long_name to a real pathname it found.  In this case
360          * we prefer the vmlinux path like
361          *   /lib/modules/3.16.4/build/vmlinux
362          *
363          * rather than build-id path (in debug directory).
364          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
365          */
366         symbol_conf.ignore_vmlinux_buildid = true;
367
368         /*
369          * If --buildid-all is given, it marks all DSO regardless of hits,
370          * so no need to process samples.
371          */
372         if (rec->buildid_all)
373                 rec->tool.sample = NULL;
374
375         return perf_session__process_events(session);
376 }
377
378 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
379 {
380         int err;
381         struct perf_tool *tool = data;
382         /*
383          *As for guest kernel when processing subcommand record&report,
384          *we arrange module mmap prior to guest kernel mmap and trigger
385          *a preload dso because default guest module symbols are loaded
386          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
387          *method is used to avoid symbol missing when the first addr is
388          *in module instead of in guest kernel.
389          */
390         err = perf_event__synthesize_modules(tool, process_synthesized_event,
391                                              machine);
392         if (err < 0)
393                 pr_err("Couldn't record guest kernel [%d]'s reference"
394                        " relocation symbol.\n", machine->pid);
395
396         /*
397          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
398          * have no _text sometimes.
399          */
400         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
401                                                  machine);
402         if (err < 0)
403                 pr_err("Couldn't record guest kernel [%d]'s reference"
404                        " relocation symbol.\n", machine->pid);
405 }
406
407 static struct perf_event_header finished_round_event = {
408         .size = sizeof(struct perf_event_header),
409         .type = PERF_RECORD_FINISHED_ROUND,
410 };
411
412 static int record__mmap_read_all(struct record *rec)
413 {
414         u64 bytes_written = rec->bytes_written;
415         int i;
416         int rc = 0;
417
418         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
419                 struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
420
421                 if (rec->evlist->mmap[i].base) {
422                         if (record__mmap_read(rec, i) != 0) {
423                                 rc = -1;
424                                 goto out;
425                         }
426                 }
427
428                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
429                     record__auxtrace_mmap_read(rec, mm) != 0) {
430                         rc = -1;
431                         goto out;
432                 }
433         }
434
435         /*
436          * Mark the round finished in case we wrote
437          * at least one event.
438          */
439         if (bytes_written != rec->bytes_written)
440                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
441
442 out:
443         return rc;
444 }
445
446 static void record__init_features(struct record *rec)
447 {
448         struct perf_session *session = rec->session;
449         int feat;
450
451         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
452                 perf_header__set_feat(&session->header, feat);
453
454         if (rec->no_buildid)
455                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
456
457         if (!have_tracepoints(&rec->evlist->entries))
458                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
459
460         if (!rec->opts.branch_stack)
461                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
462
463         if (!rec->opts.full_auxtrace)
464                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
465
466         perf_header__clear_feat(&session->header, HEADER_STAT);
467 }
468
469 static volatile int workload_exec_errno;
470
471 /*
472  * perf_evlist__prepare_workload will send a SIGUSR1
473  * if the fork fails, since we asked by setting its
474  * want_signal to true.
475  */
476 static void workload_exec_failed_signal(int signo __maybe_unused,
477                                         siginfo_t *info,
478                                         void *ucontext __maybe_unused)
479 {
480         workload_exec_errno = info->si_value.sival_int;
481         done = 1;
482         child_finished = 1;
483 }
484
485 static void snapshot_sig_handler(int sig);
486
487 static int __cmd_record(struct record *rec, int argc, const char **argv)
488 {
489         int err;
490         int status = 0;
491         unsigned long waking = 0;
492         const bool forks = argc > 0;
493         struct machine *machine;
494         struct perf_tool *tool = &rec->tool;
495         struct record_opts *opts = &rec->opts;
496         struct perf_data_file *file = &rec->file;
497         struct perf_session *session;
498         bool disabled = false, draining = false;
499         int fd;
500
501         rec->progname = argv[0];
502
503         atexit(record__sig_exit);
504         signal(SIGCHLD, sig_handler);
505         signal(SIGINT, sig_handler);
506         signal(SIGTERM, sig_handler);
507         if (rec->opts.auxtrace_snapshot_mode)
508                 signal(SIGUSR2, snapshot_sig_handler);
509         else
510                 signal(SIGUSR2, SIG_IGN);
511
512         session = perf_session__new(file, false, tool);
513         if (session == NULL) {
514                 pr_err("Perf session creation failed.\n");
515                 return -1;
516         }
517
518         fd = perf_data_file__fd(file);
519         rec->session = session;
520
521         record__init_features(rec);
522
523         if (forks) {
524                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
525                                                     argv, file->is_pipe,
526                                                     workload_exec_failed_signal);
527                 if (err < 0) {
528                         pr_err("Couldn't run the workload!\n");
529                         status = err;
530                         goto out_delete_session;
531                 }
532         }
533
534         if (record__open(rec) != 0) {
535                 err = -1;
536                 goto out_child;
537         }
538
539         /*
540          * Normally perf_session__new would do this, but it doesn't have the
541          * evlist.
542          */
543         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
544                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
545                 rec->tool.ordered_events = false;
546         }
547
548         if (!rec->evlist->nr_groups)
549                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
550
551         if (file->is_pipe) {
552                 err = perf_header__write_pipe(fd);
553                 if (err < 0)
554                         goto out_child;
555         } else {
556                 err = perf_session__write_header(session, rec->evlist, fd, false);
557                 if (err < 0)
558                         goto out_child;
559         }
560
561         if (!rec->no_buildid
562             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
563                 pr_err("Couldn't generate buildids. "
564                        "Use --no-buildid to profile anyway.\n");
565                 err = -1;
566                 goto out_child;
567         }
568
569         machine = &session->machines.host;
570
571         if (file->is_pipe) {
572                 err = perf_event__synthesize_attrs(tool, session,
573                                                    process_synthesized_event);
574                 if (err < 0) {
575                         pr_err("Couldn't synthesize attrs.\n");
576                         goto out_child;
577                 }
578
579                 if (have_tracepoints(&rec->evlist->entries)) {
580                         /*
581                          * FIXME err <= 0 here actually means that
582                          * there were no tracepoints so its not really
583                          * an error, just that we don't need to
584                          * synthesize anything.  We really have to
585                          * return this more properly and also
586                          * propagate errors that now are calling die()
587                          */
588                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
589                                                                   process_synthesized_event);
590                         if (err <= 0) {
591                                 pr_err("Couldn't record tracing data.\n");
592                                 goto out_child;
593                         }
594                         rec->bytes_written += err;
595                 }
596         }
597
598         if (rec->opts.full_auxtrace) {
599                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
600                                         session, process_synthesized_event);
601                 if (err)
602                         goto out_delete_session;
603         }
604
605         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
606                                                  machine);
607         if (err < 0)
608                 pr_err("Couldn't record kernel reference relocation symbol\n"
609                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
610                        "Check /proc/kallsyms permission or run as root.\n");
611
612         err = perf_event__synthesize_modules(tool, process_synthesized_event,
613                                              machine);
614         if (err < 0)
615                 pr_err("Couldn't record kernel module information.\n"
616                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
617                        "Check /proc/modules permission or run as root.\n");
618
619         if (perf_guest) {
620                 machines__process_guests(&session->machines,
621                                          perf_event__synthesize_guest_os, tool);
622         }
623
624         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
625                                             process_synthesized_event, opts->sample_address,
626                                             opts->proc_map_timeout);
627         if (err != 0)
628                 goto out_child;
629
630         if (rec->realtime_prio) {
631                 struct sched_param param;
632
633                 param.sched_priority = rec->realtime_prio;
634                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
635                         pr_err("Could not set realtime priority.\n");
636                         err = -1;
637                         goto out_child;
638                 }
639         }
640
641         /*
642          * When perf is starting the traced process, all the events
643          * (apart from group members) have enable_on_exec=1 set,
644          * so don't spoil it by prematurely enabling them.
645          */
646         if (!target__none(&opts->target) && !opts->initial_delay)
647                 perf_evlist__enable(rec->evlist);
648
649         /*
650          * Let the child rip
651          */
652         if (forks) {
653                 union perf_event *event;
654
655                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
656                 if (event == NULL) {
657                         err = -ENOMEM;
658                         goto out_child;
659                 }
660
661                 /*
662                  * Some H/W events are generated before COMM event
663                  * which is emitted during exec(), so perf script
664                  * cannot see a correct process name for those events.
665                  * Synthesize COMM event to prevent it.
666                  */
667                 perf_event__synthesize_comm(tool, event,
668                                             rec->evlist->workload.pid,
669                                             process_synthesized_event,
670                                             machine);
671                 free(event);
672
673                 perf_evlist__start_workload(rec->evlist);
674         }
675
676         if (opts->initial_delay) {
677                 usleep(opts->initial_delay * 1000);
678                 perf_evlist__enable(rec->evlist);
679         }
680
681         auxtrace_snapshot_enabled = 1;
682         for (;;) {
683                 unsigned long long hits = rec->samples;
684
685                 if (record__mmap_read_all(rec) < 0) {
686                         auxtrace_snapshot_enabled = 0;
687                         err = -1;
688                         goto out_child;
689                 }
690
691                 if (auxtrace_record__snapshot_started) {
692                         auxtrace_record__snapshot_started = 0;
693                         if (!auxtrace_snapshot_err)
694                                 record__read_auxtrace_snapshot(rec);
695                         if (auxtrace_snapshot_err) {
696                                 pr_err("AUX area tracing snapshot failed\n");
697                                 err = -1;
698                                 goto out_child;
699                         }
700                 }
701
702                 if (hits == rec->samples) {
703                         if (done || draining)
704                                 break;
705                         err = perf_evlist__poll(rec->evlist, -1);
706                         /*
707                          * Propagate error, only if there's any. Ignore positive
708                          * number of returned events and interrupt error.
709                          */
710                         if (err > 0 || (err < 0 && errno == EINTR))
711                                 err = 0;
712                         waking++;
713
714                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
715                                 draining = true;
716                 }
717
718                 /*
719                  * When perf is starting the traced process, at the end events
720                  * die with the process and we wait for that. Thus no need to
721                  * disable events in this case.
722                  */
723                 if (done && !disabled && !target__none(&opts->target)) {
724                         auxtrace_snapshot_enabled = 0;
725                         perf_evlist__disable(rec->evlist);
726                         disabled = true;
727                 }
728         }
729         auxtrace_snapshot_enabled = 0;
730
731         if (forks && workload_exec_errno) {
732                 char msg[STRERR_BUFSIZE];
733                 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
734                 pr_err("Workload failed: %s\n", emsg);
735                 err = -1;
736                 goto out_child;
737         }
738
739         if (!quiet)
740                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
741
742 out_child:
743         if (forks) {
744                 int exit_status;
745
746                 if (!child_finished)
747                         kill(rec->evlist->workload.pid, SIGTERM);
748
749                 wait(&exit_status);
750
751                 if (err < 0)
752                         status = err;
753                 else if (WIFEXITED(exit_status))
754                         status = WEXITSTATUS(exit_status);
755                 else if (WIFSIGNALED(exit_status))
756                         signr = WTERMSIG(exit_status);
757         } else
758                 status = err;
759
760         /* this will be recalculated during process_buildids() */
761         rec->samples = 0;
762
763         if (!err && !file->is_pipe) {
764                 rec->session->header.data_size += rec->bytes_written;
765                 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
766
767                 if (!rec->no_buildid) {
768                         process_buildids(rec);
769
770                         if (rec->buildid_all)
771                                 dsos__hit_all(rec->session);
772                 }
773                 perf_session__write_header(rec->session, rec->evlist, fd, true);
774         }
775
776         if (!err && !quiet) {
777                 char samples[128];
778
779                 if (rec->samples && !rec->opts.full_auxtrace)
780                         scnprintf(samples, sizeof(samples),
781                                   " (%" PRIu64 " samples)", rec->samples);
782                 else
783                         samples[0] = '\0';
784
785                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
786                         perf_data_file__size(file) / 1024.0 / 1024.0,
787                         file->path, samples);
788         }
789
790 out_delete_session:
791         perf_session__delete(session);
792         return status;
793 }
794
795 static void callchain_debug(void)
796 {
797         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
798
799         pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
800
801         if (callchain_param.record_mode == CALLCHAIN_DWARF)
802                 pr_debug("callchain: stack dump size %d\n",
803                          callchain_param.dump_size);
804 }
805
806 int record_parse_callchain_opt(const struct option *opt,
807                                const char *arg,
808                                int unset)
809 {
810         int ret;
811         struct record_opts *record = (struct record_opts *)opt->value;
812
813         record->callgraph_set = true;
814         callchain_param.enabled = !unset;
815
816         /* --no-call-graph */
817         if (unset) {
818                 callchain_param.record_mode = CALLCHAIN_NONE;
819                 pr_debug("callchain: disabled\n");
820                 return 0;
821         }
822
823         ret = parse_callchain_record_opt(arg, &callchain_param);
824         if (!ret) {
825                 /* Enable data address sampling for DWARF unwind. */
826                 if (callchain_param.record_mode == CALLCHAIN_DWARF)
827                         record->sample_address = true;
828                 callchain_debug();
829         }
830
831         return ret;
832 }
833
834 int record_callchain_opt(const struct option *opt,
835                          const char *arg __maybe_unused,
836                          int unset __maybe_unused)
837 {
838         struct record_opts *record = (struct record_opts *)opt->value;
839
840         record->callgraph_set = true;
841         callchain_param.enabled = true;
842
843         if (callchain_param.record_mode == CALLCHAIN_NONE)
844                 callchain_param.record_mode = CALLCHAIN_FP;
845
846         callchain_debug();
847         return 0;
848 }
849
850 static int perf_record_config(const char *var, const char *value, void *cb)
851 {
852         struct record *rec = cb;
853
854         if (!strcmp(var, "record.build-id")) {
855                 if (!strcmp(value, "cache"))
856                         rec->no_buildid_cache = false;
857                 else if (!strcmp(value, "no-cache"))
858                         rec->no_buildid_cache = true;
859                 else if (!strcmp(value, "skip"))
860                         rec->no_buildid = true;
861                 else
862                         return -1;
863                 return 0;
864         }
865         if (!strcmp(var, "record.call-graph"))
866                 var = "call-graph.record-mode"; /* fall-through */
867
868         return perf_default_config(var, value, cb);
869 }
870
871 struct clockid_map {
872         const char *name;
873         int clockid;
874 };
875
876 #define CLOCKID_MAP(n, c)       \
877         { .name = n, .clockid = (c), }
878
879 #define CLOCKID_END     { .name = NULL, }
880
881
882 /*
883  * Add the missing ones, we need to build on many distros...
884  */
885 #ifndef CLOCK_MONOTONIC_RAW
886 #define CLOCK_MONOTONIC_RAW 4
887 #endif
888 #ifndef CLOCK_BOOTTIME
889 #define CLOCK_BOOTTIME 7
890 #endif
891 #ifndef CLOCK_TAI
892 #define CLOCK_TAI 11
893 #endif
894
895 static const struct clockid_map clockids[] = {
896         /* available for all events, NMI safe */
897         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
898         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
899
900         /* available for some events */
901         CLOCKID_MAP("realtime", CLOCK_REALTIME),
902         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
903         CLOCKID_MAP("tai", CLOCK_TAI),
904
905         /* available for the lazy */
906         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
907         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
908         CLOCKID_MAP("real", CLOCK_REALTIME),
909         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
910
911         CLOCKID_END,
912 };
913
914 static int parse_clockid(const struct option *opt, const char *str, int unset)
915 {
916         struct record_opts *opts = (struct record_opts *)opt->value;
917         const struct clockid_map *cm;
918         const char *ostr = str;
919
920         if (unset) {
921                 opts->use_clockid = 0;
922                 return 0;
923         }
924
925         /* no arg passed */
926         if (!str)
927                 return 0;
928
929         /* no setting it twice */
930         if (opts->use_clockid)
931                 return -1;
932
933         opts->use_clockid = true;
934
935         /* if its a number, we're done */
936         if (sscanf(str, "%d", &opts->clockid) == 1)
937                 return 0;
938
939         /* allow a "CLOCK_" prefix to the name */
940         if (!strncasecmp(str, "CLOCK_", 6))
941                 str += 6;
942
943         for (cm = clockids; cm->name; cm++) {
944                 if (!strcasecmp(str, cm->name)) {
945                         opts->clockid = cm->clockid;
946                         return 0;
947                 }
948         }
949
950         opts->use_clockid = false;
951         ui__warning("unknown clockid %s, check man page\n", ostr);
952         return -1;
953 }
954
955 static int record__parse_mmap_pages(const struct option *opt,
956                                     const char *str,
957                                     int unset __maybe_unused)
958 {
959         struct record_opts *opts = opt->value;
960         char *s, *p;
961         unsigned int mmap_pages;
962         int ret;
963
964         if (!str)
965                 return -EINVAL;
966
967         s = strdup(str);
968         if (!s)
969                 return -ENOMEM;
970
971         p = strchr(s, ',');
972         if (p)
973                 *p = '\0';
974
975         if (*s) {
976                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
977                 if (ret)
978                         goto out_free;
979                 opts->mmap_pages = mmap_pages;
980         }
981
982         if (!p) {
983                 ret = 0;
984                 goto out_free;
985         }
986
987         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
988         if (ret)
989                 goto out_free;
990
991         opts->auxtrace_mmap_pages = mmap_pages;
992
993 out_free:
994         free(s);
995         return ret;
996 }
997
998 static const char * const __record_usage[] = {
999         "perf record [<options>] [<command>]",
1000         "perf record [<options>] -- <command> [<options>]",
1001         NULL
1002 };
1003 const char * const *record_usage = __record_usage;
1004
1005 /*
1006  * XXX Ideally would be local to cmd_record() and passed to a record__new
1007  * because we need to have access to it in record__exit, that is called
1008  * after cmd_record() exits, but since record_options need to be accessible to
1009  * builtin-script, leave it here.
1010  *
1011  * At least we don't ouch it in all the other functions here directly.
1012  *
1013  * Just say no to tons of global variables, sigh.
1014  */
1015 static struct record record = {
1016         .opts = {
1017                 .sample_time         = true,
1018                 .mmap_pages          = UINT_MAX,
1019                 .user_freq           = UINT_MAX,
1020                 .user_interval       = ULLONG_MAX,
1021                 .freq                = 4000,
1022                 .target              = {
1023                         .uses_mmap   = true,
1024                         .default_per_cpu = true,
1025                 },
1026                 .proc_map_timeout     = 500,
1027         },
1028         .tool = {
1029                 .sample         = process_sample_event,
1030                 .fork           = perf_event__process_fork,
1031                 .exit           = perf_event__process_exit,
1032                 .comm           = perf_event__process_comm,
1033                 .mmap           = perf_event__process_mmap,
1034                 .mmap2          = perf_event__process_mmap2,
1035                 .ordered_events = true,
1036         },
1037 };
1038
1039 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1040         "\n\t\t\t\tDefault: fp";
1041
1042 /*
1043  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1044  * with it and switch to use the library functions in perf_evlist that came
1045  * from builtin-record.c, i.e. use record_opts,
1046  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1047  * using pipes, etc.
1048  */
1049 struct option __record_options[] = {
1050         OPT_CALLBACK('e', "event", &record.evlist, "event",
1051                      "event selector. use 'perf list' to list available events",
1052                      parse_events_option),
1053         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1054                      "event filter", parse_filter),
1055         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1056                            NULL, "don't record events from perf itself",
1057                            exclude_perf),
1058         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1059                     "record events on existing process id"),
1060         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1061                     "record events on existing thread id"),
1062         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1063                     "collect data with this RT SCHED_FIFO priority"),
1064         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1065                     "collect data without buffering"),
1066         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1067                     "collect raw sample records from all opened counters"),
1068         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1069                             "system-wide collection from all CPUs"),
1070         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1071                     "list of cpus to monitor"),
1072         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1073         OPT_STRING('o', "output", &record.file.path, "file",
1074                     "output file name"),
1075         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1076                         &record.opts.no_inherit_set,
1077                         "child tasks do not inherit counters"),
1078         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1079         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1080                      "number of mmap data pages and AUX area tracing mmap pages",
1081                      record__parse_mmap_pages),
1082         OPT_BOOLEAN(0, "group", &record.opts.group,
1083                     "put the counters into a counter group"),
1084         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
1085                            NULL, "enables call-graph recording" ,
1086                            &record_callchain_opt),
1087         OPT_CALLBACK(0, "call-graph", &record.opts,
1088                      "record_mode[,record_size]", record_callchain_help,
1089                      &record_parse_callchain_opt),
1090         OPT_INCR('v', "verbose", &verbose,
1091                     "be more verbose (show counter open errors, etc)"),
1092         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1093         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1094                     "per thread counts"),
1095         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1096         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1097                         &record.opts.sample_time_set,
1098                         "Record the sample timestamps"),
1099         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1100         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1101                     "don't sample"),
1102         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1103                         &record.no_buildid_cache_set,
1104                         "do not update the buildid cache"),
1105         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1106                         &record.no_buildid_set,
1107                         "do not collect buildids in perf.data"),
1108         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1109                      "monitor event in cgroup name only",
1110                      parse_cgroups),
1111         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1112                   "ms to wait before starting measurement after program start"),
1113         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1114                    "user to profile"),
1115
1116         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1117                      "branch any", "sample any taken branches",
1118                      parse_branch_stack),
1119
1120         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1121                      "branch filter mask", "branch stack filter modes",
1122                      parse_branch_stack),
1123         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1124                     "sample by weight (on special events only)"),
1125         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1126                     "sample transaction flags (special events only)"),
1127         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1128                     "use per-thread mmaps"),
1129         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1130                     "sample selected machine registers on interrupt,"
1131                     " use -I ? to list register names", parse_regs),
1132         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1133                     "Record running/enabled time of read (:S) events"),
1134         OPT_CALLBACK('k', "clockid", &record.opts,
1135         "clockid", "clockid to use for events, see clock_gettime()",
1136         parse_clockid),
1137         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1138                           "opts", "AUX area tracing Snapshot Mode", ""),
1139         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1140                         "per thread proc mmap processing timeout in ms"),
1141         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1142                     "Record context switch events"),
1143         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1144                    "clang binary to use for compiling BPF scriptlets"),
1145         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1146                    "options passed to clang when compiling BPF scriptlets"),
1147         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1148                    "file", "vmlinux pathname"),
1149         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1150                     "Record build-id of all DSOs regardless of hits"),
1151         OPT_END()
1152 };
1153
1154 struct option *record_options = __record_options;
1155
1156 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1157 {
1158         int err;
1159         struct record *rec = &record;
1160         char errbuf[BUFSIZ];
1161
1162 #ifndef HAVE_LIBBPF_SUPPORT
1163 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1164         set_nobuild('\0', "clang-path", true);
1165         set_nobuild('\0', "clang-opt", true);
1166 # undef set_nobuild
1167 #endif
1168
1169 #ifndef HAVE_BPF_PROLOGUE
1170 # if !defined (HAVE_DWARF_SUPPORT)
1171 #  define REASON  "NO_DWARF=1"
1172 # elif !defined (HAVE_LIBBPF_SUPPORT)
1173 #  define REASON  "NO_LIBBPF=1"
1174 # else
1175 #  define REASON  "this architecture doesn't support BPF prologue"
1176 # endif
1177 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1178         set_nobuild('\0', "vmlinux", true);
1179 # undef set_nobuild
1180 # undef REASON
1181 #endif
1182
1183         rec->evlist = perf_evlist__new();
1184         if (rec->evlist == NULL)
1185                 return -ENOMEM;
1186
1187         perf_config(perf_record_config, rec);
1188
1189         argc = parse_options(argc, argv, record_options, record_usage,
1190                             PARSE_OPT_STOP_AT_NON_OPTION);
1191         if (!argc && target__none(&rec->opts.target))
1192                 usage_with_options(record_usage, record_options);
1193
1194         if (nr_cgroups && !rec->opts.target.system_wide) {
1195                 usage_with_options_msg(record_usage, record_options,
1196                         "cgroup monitoring only available in system-wide mode");
1197
1198         }
1199         if (rec->opts.record_switch_events &&
1200             !perf_can_record_switch_events()) {
1201                 ui__error("kernel does not support recording context switch events\n");
1202                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1203                 return -EINVAL;
1204         }
1205
1206         if (!rec->itr) {
1207                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1208                 if (err)
1209                         return err;
1210         }
1211
1212         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1213                                               rec->opts.auxtrace_snapshot_opts);
1214         if (err)
1215                 return err;
1216
1217         err = -ENOMEM;
1218
1219         symbol__init(NULL);
1220
1221         if (symbol_conf.kptr_restrict)
1222                 pr_warning(
1223 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1224 "check /proc/sys/kernel/kptr_restrict.\n\n"
1225 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1226 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1227 "Samples in kernel modules won't be resolved at all.\n\n"
1228 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1229 "even with a suitable vmlinux or kallsyms file.\n\n");
1230
1231         if (rec->no_buildid_cache || rec->no_buildid)
1232                 disable_buildid_cache();
1233
1234         if (rec->evlist->nr_entries == 0 &&
1235             perf_evlist__add_default(rec->evlist) < 0) {
1236                 pr_err("Not enough memory for event selector list\n");
1237                 goto out_symbol_exit;
1238         }
1239
1240         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1241                 rec->opts.no_inherit = true;
1242
1243         err = target__validate(&rec->opts.target);
1244         if (err) {
1245                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1246                 ui__warning("%s", errbuf);
1247         }
1248
1249         err = target__parse_uid(&rec->opts.target);
1250         if (err) {
1251                 int saved_errno = errno;
1252
1253                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1254                 ui__error("%s", errbuf);
1255
1256                 err = -saved_errno;
1257                 goto out_symbol_exit;
1258         }
1259
1260         err = -ENOMEM;
1261         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1262                 usage_with_options(record_usage, record_options);
1263
1264         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1265         if (err)
1266                 goto out_symbol_exit;
1267
1268         /*
1269          * We take all buildids when the file contains
1270          * AUX area tracing data because we do not decode the
1271          * trace because it would take too long.
1272          */
1273         if (rec->opts.full_auxtrace)
1274                 rec->buildid_all = true;
1275
1276         if (record_opts__config(&rec->opts)) {
1277                 err = -EINVAL;
1278                 goto out_symbol_exit;
1279         }
1280
1281         err = __cmd_record(&record, argc, argv);
1282 out_symbol_exit:
1283         perf_evlist__delete(rec->evlist);
1284         symbol__exit();
1285         auxtrace_record__free(rec->itr);
1286         return err;
1287 }
1288
1289 static void snapshot_sig_handler(int sig __maybe_unused)
1290 {
1291         if (!auxtrace_snapshot_enabled)
1292                 return;
1293         auxtrace_snapshot_enabled = 0;
1294         auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
1295         auxtrace_record__snapshot_started = 1;
1296 }