]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
Merge tag 'spi-v4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 #include "util/perf_regs.h"
31 #include "util/auxtrace.h"
32 #include "util/parse-branch-options.h"
33 #include "util/parse-regs-options.h"
34 #include "util/llvm-utils.h"
35
36 #include <unistd.h>
37 #include <sched.h>
38 #include <sys/mman.h>
39
40
41 struct record {
42         struct perf_tool        tool;
43         struct record_opts      opts;
44         u64                     bytes_written;
45         struct perf_data_file   file;
46         struct auxtrace_record  *itr;
47         struct perf_evlist      *evlist;
48         struct perf_session     *session;
49         const char              *progname;
50         int                     realtime_prio;
51         bool                    no_buildid;
52         bool                    no_buildid_cache;
53         unsigned long long      samples;
54 };
55
56 static int record__write(struct record *rec, void *bf, size_t size)
57 {
58         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
59                 pr_err("failed to write perf data, error: %m\n");
60                 return -1;
61         }
62
63         rec->bytes_written += size;
64         return 0;
65 }
66
67 static int process_synthesized_event(struct perf_tool *tool,
68                                      union perf_event *event,
69                                      struct perf_sample *sample __maybe_unused,
70                                      struct machine *machine __maybe_unused)
71 {
72         struct record *rec = container_of(tool, struct record, tool);
73         return record__write(rec, event, event->header.size);
74 }
75
76 static int record__mmap_read(struct record *rec, int idx)
77 {
78         struct perf_mmap *md = &rec->evlist->mmap[idx];
79         u64 head = perf_mmap__read_head(md);
80         u64 old = md->prev;
81         unsigned char *data = md->base + page_size;
82         unsigned long size;
83         void *buf;
84         int rc = 0;
85
86         if (old == head)
87                 return 0;
88
89         rec->samples++;
90
91         size = head - old;
92
93         if ((old & md->mask) + size != (head & md->mask)) {
94                 buf = &data[old & md->mask];
95                 size = md->mask + 1 - (old & md->mask);
96                 old += size;
97
98                 if (record__write(rec, buf, size) < 0) {
99                         rc = -1;
100                         goto out;
101                 }
102         }
103
104         buf = &data[old & md->mask];
105         size = head - old;
106         old += size;
107
108         if (record__write(rec, buf, size) < 0) {
109                 rc = -1;
110                 goto out;
111         }
112
113         md->prev = old;
114         perf_evlist__mmap_consume(rec->evlist, idx);
115 out:
116         return rc;
117 }
118
119 static volatile int done;
120 static volatile int signr = -1;
121 static volatile int child_finished;
122 static volatile int auxtrace_snapshot_enabled;
123 static volatile int auxtrace_snapshot_err;
124 static volatile int auxtrace_record__snapshot_started;
125
126 static void sig_handler(int sig)
127 {
128         if (sig == SIGCHLD)
129                 child_finished = 1;
130         else
131                 signr = sig;
132
133         done = 1;
134 }
135
136 static void record__sig_exit(void)
137 {
138         if (signr == -1)
139                 return;
140
141         signal(signr, SIG_DFL);
142         raise(signr);
143 }
144
145 #ifdef HAVE_AUXTRACE_SUPPORT
146
147 static int record__process_auxtrace(struct perf_tool *tool,
148                                     union perf_event *event, void *data1,
149                                     size_t len1, void *data2, size_t len2)
150 {
151         struct record *rec = container_of(tool, struct record, tool);
152         struct perf_data_file *file = &rec->file;
153         size_t padding;
154         u8 pad[8] = {0};
155
156         if (!perf_data_file__is_pipe(file)) {
157                 off_t file_offset;
158                 int fd = perf_data_file__fd(file);
159                 int err;
160
161                 file_offset = lseek(fd, 0, SEEK_CUR);
162                 if (file_offset == -1)
163                         return -1;
164                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
165                                                      event, file_offset);
166                 if (err)
167                         return err;
168         }
169
170         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
171         padding = (len1 + len2) & 7;
172         if (padding)
173                 padding = 8 - padding;
174
175         record__write(rec, event, event->header.size);
176         record__write(rec, data1, len1);
177         if (len2)
178                 record__write(rec, data2, len2);
179         record__write(rec, &pad, padding);
180
181         return 0;
182 }
183
184 static int record__auxtrace_mmap_read(struct record *rec,
185                                       struct auxtrace_mmap *mm)
186 {
187         int ret;
188
189         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
190                                   record__process_auxtrace);
191         if (ret < 0)
192                 return ret;
193
194         if (ret)
195                 rec->samples++;
196
197         return 0;
198 }
199
200 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
201                                                struct auxtrace_mmap *mm)
202 {
203         int ret;
204
205         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
206                                            record__process_auxtrace,
207                                            rec->opts.auxtrace_snapshot_size);
208         if (ret < 0)
209                 return ret;
210
211         if (ret)
212                 rec->samples++;
213
214         return 0;
215 }
216
217 static int record__auxtrace_read_snapshot_all(struct record *rec)
218 {
219         int i;
220         int rc = 0;
221
222         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
223                 struct auxtrace_mmap *mm =
224                                 &rec->evlist->mmap[i].auxtrace_mmap;
225
226                 if (!mm->base)
227                         continue;
228
229                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
230                         rc = -1;
231                         goto out;
232                 }
233         }
234 out:
235         return rc;
236 }
237
238 static void record__read_auxtrace_snapshot(struct record *rec)
239 {
240         pr_debug("Recording AUX area tracing snapshot\n");
241         if (record__auxtrace_read_snapshot_all(rec) < 0) {
242                 auxtrace_snapshot_err = -1;
243         } else {
244                 auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
245                 if (!auxtrace_snapshot_err)
246                         auxtrace_snapshot_enabled = 1;
247         }
248 }
249
250 #else
251
252 static inline
253 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
254                                struct auxtrace_mmap *mm __maybe_unused)
255 {
256         return 0;
257 }
258
259 static inline
260 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
261 {
262 }
263
264 static inline
265 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
266 {
267         return 0;
268 }
269
270 #endif
271
272 static int record__open(struct record *rec)
273 {
274         char msg[512];
275         struct perf_evsel *pos;
276         struct perf_evlist *evlist = rec->evlist;
277         struct perf_session *session = rec->session;
278         struct record_opts *opts = &rec->opts;
279         int rc = 0;
280
281         perf_evlist__config(evlist, opts);
282
283         evlist__for_each(evlist, pos) {
284 try_again:
285                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
286                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
287                                 if (verbose)
288                                         ui__warning("%s\n", msg);
289                                 goto try_again;
290                         }
291
292                         rc = -errno;
293                         perf_evsel__open_strerror(pos, &opts->target,
294                                                   errno, msg, sizeof(msg));
295                         ui__error("%s\n", msg);
296                         goto out;
297                 }
298         }
299
300         if (perf_evlist__apply_filters(evlist, &pos)) {
301                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
302                         pos->filter, perf_evsel__name(pos), errno,
303                         strerror_r(errno, msg, sizeof(msg)));
304                 rc = -1;
305                 goto out;
306         }
307
308         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
309                                  opts->auxtrace_mmap_pages,
310                                  opts->auxtrace_snapshot_mode) < 0) {
311                 if (errno == EPERM) {
312                         pr_err("Permission error mapping pages.\n"
313                                "Consider increasing "
314                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
315                                "or try again with a smaller value of -m/--mmap_pages.\n"
316                                "(current value: %u,%u)\n",
317                                opts->mmap_pages, opts->auxtrace_mmap_pages);
318                         rc = -errno;
319                 } else {
320                         pr_err("failed to mmap with %d (%s)\n", errno,
321                                 strerror_r(errno, msg, sizeof(msg)));
322                         rc = -errno;
323                 }
324                 goto out;
325         }
326
327         session->evlist = evlist;
328         perf_session__set_id_hdr_size(session);
329 out:
330         return rc;
331 }
332
333 static int process_sample_event(struct perf_tool *tool,
334                                 union perf_event *event,
335                                 struct perf_sample *sample,
336                                 struct perf_evsel *evsel,
337                                 struct machine *machine)
338 {
339         struct record *rec = container_of(tool, struct record, tool);
340
341         rec->samples++;
342
343         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
344 }
345
346 static int process_buildids(struct record *rec)
347 {
348         struct perf_data_file *file  = &rec->file;
349         struct perf_session *session = rec->session;
350
351         if (file->size == 0)
352                 return 0;
353
354         /*
355          * During this process, it'll load kernel map and replace the
356          * dso->long_name to a real pathname it found.  In this case
357          * we prefer the vmlinux path like
358          *   /lib/modules/3.16.4/build/vmlinux
359          *
360          * rather than build-id path (in debug directory).
361          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
362          */
363         symbol_conf.ignore_vmlinux_buildid = true;
364
365         return perf_session__process_events(session);
366 }
367
368 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
369 {
370         int err;
371         struct perf_tool *tool = data;
372         /*
373          *As for guest kernel when processing subcommand record&report,
374          *we arrange module mmap prior to guest kernel mmap and trigger
375          *a preload dso because default guest module symbols are loaded
376          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
377          *method is used to avoid symbol missing when the first addr is
378          *in module instead of in guest kernel.
379          */
380         err = perf_event__synthesize_modules(tool, process_synthesized_event,
381                                              machine);
382         if (err < 0)
383                 pr_err("Couldn't record guest kernel [%d]'s reference"
384                        " relocation symbol.\n", machine->pid);
385
386         /*
387          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
388          * have no _text sometimes.
389          */
390         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
391                                                  machine);
392         if (err < 0)
393                 pr_err("Couldn't record guest kernel [%d]'s reference"
394                        " relocation symbol.\n", machine->pid);
395 }
396
397 static struct perf_event_header finished_round_event = {
398         .size = sizeof(struct perf_event_header),
399         .type = PERF_RECORD_FINISHED_ROUND,
400 };
401
402 static int record__mmap_read_all(struct record *rec)
403 {
404         u64 bytes_written = rec->bytes_written;
405         int i;
406         int rc = 0;
407
408         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
409                 struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
410
411                 if (rec->evlist->mmap[i].base) {
412                         if (record__mmap_read(rec, i) != 0) {
413                                 rc = -1;
414                                 goto out;
415                         }
416                 }
417
418                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
419                     record__auxtrace_mmap_read(rec, mm) != 0) {
420                         rc = -1;
421                         goto out;
422                 }
423         }
424
425         /*
426          * Mark the round finished in case we wrote
427          * at least one event.
428          */
429         if (bytes_written != rec->bytes_written)
430                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
431
432 out:
433         return rc;
434 }
435
436 static void record__init_features(struct record *rec)
437 {
438         struct perf_session *session = rec->session;
439         int feat;
440
441         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
442                 perf_header__set_feat(&session->header, feat);
443
444         if (rec->no_buildid)
445                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
446
447         if (!have_tracepoints(&rec->evlist->entries))
448                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
449
450         if (!rec->opts.branch_stack)
451                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
452
453         if (!rec->opts.full_auxtrace)
454                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
455
456         perf_header__clear_feat(&session->header, HEADER_STAT);
457 }
458
459 static volatile int workload_exec_errno;
460
461 /*
462  * perf_evlist__prepare_workload will send a SIGUSR1
463  * if the fork fails, since we asked by setting its
464  * want_signal to true.
465  */
466 static void workload_exec_failed_signal(int signo __maybe_unused,
467                                         siginfo_t *info,
468                                         void *ucontext __maybe_unused)
469 {
470         workload_exec_errno = info->si_value.sival_int;
471         done = 1;
472         child_finished = 1;
473 }
474
475 static void snapshot_sig_handler(int sig);
476
477 static int __cmd_record(struct record *rec, int argc, const char **argv)
478 {
479         int err;
480         int status = 0;
481         unsigned long waking = 0;
482         const bool forks = argc > 0;
483         struct machine *machine;
484         struct perf_tool *tool = &rec->tool;
485         struct record_opts *opts = &rec->opts;
486         struct perf_data_file *file = &rec->file;
487         struct perf_session *session;
488         bool disabled = false, draining = false;
489         int fd;
490
491         rec->progname = argv[0];
492
493         atexit(record__sig_exit);
494         signal(SIGCHLD, sig_handler);
495         signal(SIGINT, sig_handler);
496         signal(SIGTERM, sig_handler);
497         if (rec->opts.auxtrace_snapshot_mode)
498                 signal(SIGUSR2, snapshot_sig_handler);
499         else
500                 signal(SIGUSR2, SIG_IGN);
501
502         session = perf_session__new(file, false, tool);
503         if (session == NULL) {
504                 pr_err("Perf session creation failed.\n");
505                 return -1;
506         }
507
508         fd = perf_data_file__fd(file);
509         rec->session = session;
510
511         record__init_features(rec);
512
513         if (forks) {
514                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
515                                                     argv, file->is_pipe,
516                                                     workload_exec_failed_signal);
517                 if (err < 0) {
518                         pr_err("Couldn't run the workload!\n");
519                         status = err;
520                         goto out_delete_session;
521                 }
522         }
523
524         if (record__open(rec) != 0) {
525                 err = -1;
526                 goto out_child;
527         }
528
529         /*
530          * Normally perf_session__new would do this, but it doesn't have the
531          * evlist.
532          */
533         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
534                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
535                 rec->tool.ordered_events = false;
536         }
537
538         if (!rec->evlist->nr_groups)
539                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
540
541         if (file->is_pipe) {
542                 err = perf_header__write_pipe(fd);
543                 if (err < 0)
544                         goto out_child;
545         } else {
546                 err = perf_session__write_header(session, rec->evlist, fd, false);
547                 if (err < 0)
548                         goto out_child;
549         }
550
551         if (!rec->no_buildid
552             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
553                 pr_err("Couldn't generate buildids. "
554                        "Use --no-buildid to profile anyway.\n");
555                 err = -1;
556                 goto out_child;
557         }
558
559         machine = &session->machines.host;
560
561         if (file->is_pipe) {
562                 err = perf_event__synthesize_attrs(tool, session,
563                                                    process_synthesized_event);
564                 if (err < 0) {
565                         pr_err("Couldn't synthesize attrs.\n");
566                         goto out_child;
567                 }
568
569                 if (have_tracepoints(&rec->evlist->entries)) {
570                         /*
571                          * FIXME err <= 0 here actually means that
572                          * there were no tracepoints so its not really
573                          * an error, just that we don't need to
574                          * synthesize anything.  We really have to
575                          * return this more properly and also
576                          * propagate errors that now are calling die()
577                          */
578                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
579                                                                   process_synthesized_event);
580                         if (err <= 0) {
581                                 pr_err("Couldn't record tracing data.\n");
582                                 goto out_child;
583                         }
584                         rec->bytes_written += err;
585                 }
586         }
587
588         if (rec->opts.full_auxtrace) {
589                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
590                                         session, process_synthesized_event);
591                 if (err)
592                         goto out_delete_session;
593         }
594
595         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
596                                                  machine);
597         if (err < 0)
598                 pr_err("Couldn't record kernel reference relocation symbol\n"
599                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
600                        "Check /proc/kallsyms permission or run as root.\n");
601
602         err = perf_event__synthesize_modules(tool, process_synthesized_event,
603                                              machine);
604         if (err < 0)
605                 pr_err("Couldn't record kernel module information.\n"
606                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
607                        "Check /proc/modules permission or run as root.\n");
608
609         if (perf_guest) {
610                 machines__process_guests(&session->machines,
611                                          perf_event__synthesize_guest_os, tool);
612         }
613
614         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
615                                             process_synthesized_event, opts->sample_address,
616                                             opts->proc_map_timeout);
617         if (err != 0)
618                 goto out_child;
619
620         if (rec->realtime_prio) {
621                 struct sched_param param;
622
623                 param.sched_priority = rec->realtime_prio;
624                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
625                         pr_err("Could not set realtime priority.\n");
626                         err = -1;
627                         goto out_child;
628                 }
629         }
630
631         /*
632          * When perf is starting the traced process, all the events
633          * (apart from group members) have enable_on_exec=1 set,
634          * so don't spoil it by prematurely enabling them.
635          */
636         if (!target__none(&opts->target) && !opts->initial_delay)
637                 perf_evlist__enable(rec->evlist);
638
639         /*
640          * Let the child rip
641          */
642         if (forks) {
643                 union perf_event *event;
644
645                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
646                 if (event == NULL) {
647                         err = -ENOMEM;
648                         goto out_child;
649                 }
650
651                 /*
652                  * Some H/W events are generated before COMM event
653                  * which is emitted during exec(), so perf script
654                  * cannot see a correct process name for those events.
655                  * Synthesize COMM event to prevent it.
656                  */
657                 perf_event__synthesize_comm(tool, event,
658                                             rec->evlist->workload.pid,
659                                             process_synthesized_event,
660                                             machine);
661                 free(event);
662
663                 perf_evlist__start_workload(rec->evlist);
664         }
665
666         if (opts->initial_delay) {
667                 usleep(opts->initial_delay * 1000);
668                 perf_evlist__enable(rec->evlist);
669         }
670
671         auxtrace_snapshot_enabled = 1;
672         for (;;) {
673                 unsigned long long hits = rec->samples;
674
675                 if (record__mmap_read_all(rec) < 0) {
676                         auxtrace_snapshot_enabled = 0;
677                         err = -1;
678                         goto out_child;
679                 }
680
681                 if (auxtrace_record__snapshot_started) {
682                         auxtrace_record__snapshot_started = 0;
683                         if (!auxtrace_snapshot_err)
684                                 record__read_auxtrace_snapshot(rec);
685                         if (auxtrace_snapshot_err) {
686                                 pr_err("AUX area tracing snapshot failed\n");
687                                 err = -1;
688                                 goto out_child;
689                         }
690                 }
691
692                 if (hits == rec->samples) {
693                         if (done || draining)
694                                 break;
695                         err = perf_evlist__poll(rec->evlist, -1);
696                         /*
697                          * Propagate error, only if there's any. Ignore positive
698                          * number of returned events and interrupt error.
699                          */
700                         if (err > 0 || (err < 0 && errno == EINTR))
701                                 err = 0;
702                         waking++;
703
704                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
705                                 draining = true;
706                 }
707
708                 /*
709                  * When perf is starting the traced process, at the end events
710                  * die with the process and we wait for that. Thus no need to
711                  * disable events in this case.
712                  */
713                 if (done && !disabled && !target__none(&opts->target)) {
714                         auxtrace_snapshot_enabled = 0;
715                         perf_evlist__disable(rec->evlist);
716                         disabled = true;
717                 }
718         }
719         auxtrace_snapshot_enabled = 0;
720
721         if (forks && workload_exec_errno) {
722                 char msg[STRERR_BUFSIZE];
723                 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
724                 pr_err("Workload failed: %s\n", emsg);
725                 err = -1;
726                 goto out_child;
727         }
728
729         if (!quiet)
730                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
731
732 out_child:
733         if (forks) {
734                 int exit_status;
735
736                 if (!child_finished)
737                         kill(rec->evlist->workload.pid, SIGTERM);
738
739                 wait(&exit_status);
740
741                 if (err < 0)
742                         status = err;
743                 else if (WIFEXITED(exit_status))
744                         status = WEXITSTATUS(exit_status);
745                 else if (WIFSIGNALED(exit_status))
746                         signr = WTERMSIG(exit_status);
747         } else
748                 status = err;
749
750         /* this will be recalculated during process_buildids() */
751         rec->samples = 0;
752
753         if (!err && !file->is_pipe) {
754                 rec->session->header.data_size += rec->bytes_written;
755                 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
756
757                 if (!rec->no_buildid) {
758                         process_buildids(rec);
759                         /*
760                          * We take all buildids when the file contains
761                          * AUX area tracing data because we do not decode the
762                          * trace because it would take too long.
763                          */
764                         if (rec->opts.full_auxtrace)
765                                 dsos__hit_all(rec->session);
766                 }
767                 perf_session__write_header(rec->session, rec->evlist, fd, true);
768         }
769
770         if (!err && !quiet) {
771                 char samples[128];
772
773                 if (rec->samples && !rec->opts.full_auxtrace)
774                         scnprintf(samples, sizeof(samples),
775                                   " (%" PRIu64 " samples)", rec->samples);
776                 else
777                         samples[0] = '\0';
778
779                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
780                         perf_data_file__size(file) / 1024.0 / 1024.0,
781                         file->path, samples);
782         }
783
784 out_delete_session:
785         perf_session__delete(session);
786         return status;
787 }
788
789 static void callchain_debug(void)
790 {
791         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
792
793         pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
794
795         if (callchain_param.record_mode == CALLCHAIN_DWARF)
796                 pr_debug("callchain: stack dump size %d\n",
797                          callchain_param.dump_size);
798 }
799
800 int record_parse_callchain_opt(const struct option *opt,
801                                const char *arg,
802                                int unset)
803 {
804         int ret;
805         struct record_opts *record = (struct record_opts *)opt->value;
806
807         record->callgraph_set = true;
808         callchain_param.enabled = !unset;
809
810         /* --no-call-graph */
811         if (unset) {
812                 callchain_param.record_mode = CALLCHAIN_NONE;
813                 pr_debug("callchain: disabled\n");
814                 return 0;
815         }
816
817         ret = parse_callchain_record_opt(arg, &callchain_param);
818         if (!ret) {
819                 /* Enable data address sampling for DWARF unwind. */
820                 if (callchain_param.record_mode == CALLCHAIN_DWARF)
821                         record->sample_address = true;
822                 callchain_debug();
823         }
824
825         return ret;
826 }
827
828 int record_callchain_opt(const struct option *opt,
829                          const char *arg __maybe_unused,
830                          int unset __maybe_unused)
831 {
832         struct record_opts *record = (struct record_opts *)opt->value;
833
834         record->callgraph_set = true;
835         callchain_param.enabled = true;
836
837         if (callchain_param.record_mode == CALLCHAIN_NONE)
838                 callchain_param.record_mode = CALLCHAIN_FP;
839
840         callchain_debug();
841         return 0;
842 }
843
844 static int perf_record_config(const char *var, const char *value, void *cb)
845 {
846         struct record *rec = cb;
847
848         if (!strcmp(var, "record.build-id")) {
849                 if (!strcmp(value, "cache"))
850                         rec->no_buildid_cache = false;
851                 else if (!strcmp(value, "no-cache"))
852                         rec->no_buildid_cache = true;
853                 else if (!strcmp(value, "skip"))
854                         rec->no_buildid = true;
855                 else
856                         return -1;
857                 return 0;
858         }
859         if (!strcmp(var, "record.call-graph"))
860                 var = "call-graph.record-mode"; /* fall-through */
861
862         return perf_default_config(var, value, cb);
863 }
864
865 struct clockid_map {
866         const char *name;
867         int clockid;
868 };
869
870 #define CLOCKID_MAP(n, c)       \
871         { .name = n, .clockid = (c), }
872
873 #define CLOCKID_END     { .name = NULL, }
874
875
876 /*
877  * Add the missing ones, we need to build on many distros...
878  */
879 #ifndef CLOCK_MONOTONIC_RAW
880 #define CLOCK_MONOTONIC_RAW 4
881 #endif
882 #ifndef CLOCK_BOOTTIME
883 #define CLOCK_BOOTTIME 7
884 #endif
885 #ifndef CLOCK_TAI
886 #define CLOCK_TAI 11
887 #endif
888
889 static const struct clockid_map clockids[] = {
890         /* available for all events, NMI safe */
891         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
892         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
893
894         /* available for some events */
895         CLOCKID_MAP("realtime", CLOCK_REALTIME),
896         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
897         CLOCKID_MAP("tai", CLOCK_TAI),
898
899         /* available for the lazy */
900         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
901         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
902         CLOCKID_MAP("real", CLOCK_REALTIME),
903         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
904
905         CLOCKID_END,
906 };
907
908 static int parse_clockid(const struct option *opt, const char *str, int unset)
909 {
910         struct record_opts *opts = (struct record_opts *)opt->value;
911         const struct clockid_map *cm;
912         const char *ostr = str;
913
914         if (unset) {
915                 opts->use_clockid = 0;
916                 return 0;
917         }
918
919         /* no arg passed */
920         if (!str)
921                 return 0;
922
923         /* no setting it twice */
924         if (opts->use_clockid)
925                 return -1;
926
927         opts->use_clockid = true;
928
929         /* if its a number, we're done */
930         if (sscanf(str, "%d", &opts->clockid) == 1)
931                 return 0;
932
933         /* allow a "CLOCK_" prefix to the name */
934         if (!strncasecmp(str, "CLOCK_", 6))
935                 str += 6;
936
937         for (cm = clockids; cm->name; cm++) {
938                 if (!strcasecmp(str, cm->name)) {
939                         opts->clockid = cm->clockid;
940                         return 0;
941                 }
942         }
943
944         opts->use_clockid = false;
945         ui__warning("unknown clockid %s, check man page\n", ostr);
946         return -1;
947 }
948
949 static int record__parse_mmap_pages(const struct option *opt,
950                                     const char *str,
951                                     int unset __maybe_unused)
952 {
953         struct record_opts *opts = opt->value;
954         char *s, *p;
955         unsigned int mmap_pages;
956         int ret;
957
958         if (!str)
959                 return -EINVAL;
960
961         s = strdup(str);
962         if (!s)
963                 return -ENOMEM;
964
965         p = strchr(s, ',');
966         if (p)
967                 *p = '\0';
968
969         if (*s) {
970                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
971                 if (ret)
972                         goto out_free;
973                 opts->mmap_pages = mmap_pages;
974         }
975
976         if (!p) {
977                 ret = 0;
978                 goto out_free;
979         }
980
981         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
982         if (ret)
983                 goto out_free;
984
985         opts->auxtrace_mmap_pages = mmap_pages;
986
987 out_free:
988         free(s);
989         return ret;
990 }
991
992 static const char * const __record_usage[] = {
993         "perf record [<options>] [<command>]",
994         "perf record [<options>] -- <command> [<options>]",
995         NULL
996 };
997 const char * const *record_usage = __record_usage;
998
999 /*
1000  * XXX Ideally would be local to cmd_record() and passed to a record__new
1001  * because we need to have access to it in record__exit, that is called
1002  * after cmd_record() exits, but since record_options need to be accessible to
1003  * builtin-script, leave it here.
1004  *
1005  * At least we don't ouch it in all the other functions here directly.
1006  *
1007  * Just say no to tons of global variables, sigh.
1008  */
1009 static struct record record = {
1010         .opts = {
1011                 .sample_time         = true,
1012                 .mmap_pages          = UINT_MAX,
1013                 .user_freq           = UINT_MAX,
1014                 .user_interval       = ULLONG_MAX,
1015                 .freq                = 4000,
1016                 .target              = {
1017                         .uses_mmap   = true,
1018                         .default_per_cpu = true,
1019                 },
1020                 .proc_map_timeout     = 500,
1021         },
1022         .tool = {
1023                 .sample         = process_sample_event,
1024                 .fork           = perf_event__process_fork,
1025                 .exit           = perf_event__process_exit,
1026                 .comm           = perf_event__process_comm,
1027                 .mmap           = perf_event__process_mmap,
1028                 .mmap2          = perf_event__process_mmap2,
1029                 .ordered_events = true,
1030         },
1031 };
1032
1033 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1034         "\n\t\t\t\tDefault: fp";
1035
1036 /*
1037  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1038  * with it and switch to use the library functions in perf_evlist that came
1039  * from builtin-record.c, i.e. use record_opts,
1040  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1041  * using pipes, etc.
1042  */
1043 struct option __record_options[] = {
1044         OPT_CALLBACK('e', "event", &record.evlist, "event",
1045                      "event selector. use 'perf list' to list available events",
1046                      parse_events_option),
1047         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1048                      "event filter", parse_filter),
1049         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1050                            NULL, "don't record events from perf itself",
1051                            exclude_perf),
1052         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1053                     "record events on existing process id"),
1054         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1055                     "record events on existing thread id"),
1056         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1057                     "collect data with this RT SCHED_FIFO priority"),
1058         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1059                     "collect data without buffering"),
1060         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1061                     "collect raw sample records from all opened counters"),
1062         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1063                             "system-wide collection from all CPUs"),
1064         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1065                     "list of cpus to monitor"),
1066         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1067         OPT_STRING('o', "output", &record.file.path, "file",
1068                     "output file name"),
1069         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1070                         &record.opts.no_inherit_set,
1071                         "child tasks do not inherit counters"),
1072         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1073         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1074                      "number of mmap data pages and AUX area tracing mmap pages",
1075                      record__parse_mmap_pages),
1076         OPT_BOOLEAN(0, "group", &record.opts.group,
1077                     "put the counters into a counter group"),
1078         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
1079                            NULL, "enables call-graph recording" ,
1080                            &record_callchain_opt),
1081         OPT_CALLBACK(0, "call-graph", &record.opts,
1082                      "record_mode[,record_size]", record_callchain_help,
1083                      &record_parse_callchain_opt),
1084         OPT_INCR('v', "verbose", &verbose,
1085                     "be more verbose (show counter open errors, etc)"),
1086         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1087         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1088                     "per thread counts"),
1089         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1090         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1091                         &record.opts.sample_time_set,
1092                         "Record the sample timestamps"),
1093         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1094         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1095                     "don't sample"),
1096         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
1097                     "do not update the buildid cache"),
1098         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
1099                     "do not collect buildids in perf.data"),
1100         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1101                      "monitor event in cgroup name only",
1102                      parse_cgroups),
1103         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1104                   "ms to wait before starting measurement after program start"),
1105         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1106                    "user to profile"),
1107
1108         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1109                      "branch any", "sample any taken branches",
1110                      parse_branch_stack),
1111
1112         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1113                      "branch filter mask", "branch stack filter modes",
1114                      parse_branch_stack),
1115         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1116                     "sample by weight (on special events only)"),
1117         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1118                     "sample transaction flags (special events only)"),
1119         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1120                     "use per-thread mmaps"),
1121         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1122                     "sample selected machine registers on interrupt,"
1123                     " use -I ? to list register names", parse_regs),
1124         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1125                     "Record running/enabled time of read (:S) events"),
1126         OPT_CALLBACK('k', "clockid", &record.opts,
1127         "clockid", "clockid to use for events, see clock_gettime()",
1128         parse_clockid),
1129         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1130                           "opts", "AUX area tracing Snapshot Mode", ""),
1131         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1132                         "per thread proc mmap processing timeout in ms"),
1133         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1134                     "Record context switch events"),
1135         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1136                    "clang binary to use for compiling BPF scriptlets"),
1137         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1138                    "options passed to clang when compiling BPF scriptlets"),
1139         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1140                    "file", "vmlinux pathname"),
1141         OPT_END()
1142 };
1143
1144 struct option *record_options = __record_options;
1145
1146 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1147 {
1148         int err;
1149         struct record *rec = &record;
1150         char errbuf[BUFSIZ];
1151
1152 #ifndef HAVE_LIBBPF_SUPPORT
1153 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1154         set_nobuild('\0', "clang-path", true);
1155         set_nobuild('\0', "clang-opt", true);
1156 # undef set_nobuild
1157 #endif
1158
1159 #ifndef HAVE_BPF_PROLOGUE
1160 # if !defined (HAVE_DWARF_SUPPORT)
1161 #  define REASON  "NO_DWARF=1"
1162 # elif !defined (HAVE_LIBBPF_SUPPORT)
1163 #  define REASON  "NO_LIBBPF=1"
1164 # else
1165 #  define REASON  "this architecture doesn't support BPF prologue"
1166 # endif
1167 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1168         set_nobuild('\0', "vmlinux", true);
1169 # undef set_nobuild
1170 # undef REASON
1171 #endif
1172
1173         rec->evlist = perf_evlist__new();
1174         if (rec->evlist == NULL)
1175                 return -ENOMEM;
1176
1177         perf_config(perf_record_config, rec);
1178
1179         argc = parse_options(argc, argv, record_options, record_usage,
1180                             PARSE_OPT_STOP_AT_NON_OPTION);
1181         if (!argc && target__none(&rec->opts.target))
1182                 usage_with_options(record_usage, record_options);
1183
1184         if (nr_cgroups && !rec->opts.target.system_wide) {
1185                 usage_with_options_msg(record_usage, record_options,
1186                         "cgroup monitoring only available in system-wide mode");
1187
1188         }
1189         if (rec->opts.record_switch_events &&
1190             !perf_can_record_switch_events()) {
1191                 ui__error("kernel does not support recording context switch events\n");
1192                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1193                 return -EINVAL;
1194         }
1195
1196         if (!rec->itr) {
1197                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1198                 if (err)
1199                         return err;
1200         }
1201
1202         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1203                                               rec->opts.auxtrace_snapshot_opts);
1204         if (err)
1205                 return err;
1206
1207         err = -ENOMEM;
1208
1209         symbol__init(NULL);
1210
1211         if (symbol_conf.kptr_restrict)
1212                 pr_warning(
1213 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1214 "check /proc/sys/kernel/kptr_restrict.\n\n"
1215 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1216 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1217 "Samples in kernel modules won't be resolved at all.\n\n"
1218 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1219 "even with a suitable vmlinux or kallsyms file.\n\n");
1220
1221         if (rec->no_buildid_cache || rec->no_buildid)
1222                 disable_buildid_cache();
1223
1224         if (rec->evlist->nr_entries == 0 &&
1225             perf_evlist__add_default(rec->evlist) < 0) {
1226                 pr_err("Not enough memory for event selector list\n");
1227                 goto out_symbol_exit;
1228         }
1229
1230         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1231                 rec->opts.no_inherit = true;
1232
1233         err = target__validate(&rec->opts.target);
1234         if (err) {
1235                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1236                 ui__warning("%s", errbuf);
1237         }
1238
1239         err = target__parse_uid(&rec->opts.target);
1240         if (err) {
1241                 int saved_errno = errno;
1242
1243                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1244                 ui__error("%s", errbuf);
1245
1246                 err = -saved_errno;
1247                 goto out_symbol_exit;
1248         }
1249
1250         err = -ENOMEM;
1251         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1252                 usage_with_options(record_usage, record_options);
1253
1254         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1255         if (err)
1256                 goto out_symbol_exit;
1257
1258         if (record_opts__config(&rec->opts)) {
1259                 err = -EINVAL;
1260                 goto out_symbol_exit;
1261         }
1262
1263         err = __cmd_record(&record, argc, argv);
1264 out_symbol_exit:
1265         perf_evlist__delete(rec->evlist);
1266         symbol__exit();
1267         auxtrace_record__free(rec->itr);
1268         return err;
1269 }
1270
1271 static void snapshot_sig_handler(int sig __maybe_unused)
1272 {
1273         if (!auxtrace_snapshot_enabled)
1274                 return;
1275         auxtrace_snapshot_enabled = 0;
1276         auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
1277         auxtrace_record__snapshot_started = 1;
1278 }