]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
Merge branch 'perf/core' into oprofile/master
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 enum write_mode_t {
34         WRITE_FORCE,
35         WRITE_APPEND
36 };
37
38 static u64                      user_interval                   = ULLONG_MAX;
39 static u64                      default_interval                =      0;
40
41 static unsigned int             page_size;
42 static unsigned int             mmap_pages                      = UINT_MAX;
43 static unsigned int             user_freq                       = UINT_MAX;
44 static int                      freq                            =   1000;
45 static int                      output;
46 static int                      pipe_output                     =      0;
47 static const char               *output_name                    = NULL;
48 static bool                     group                           =  false;
49 static int                      realtime_prio                   =      0;
50 static bool                     nodelay                         =  false;
51 static bool                     raw_samples                     =  false;
52 static bool                     sample_id_all_avail             =   true;
53 static bool                     system_wide                     =  false;
54 static pid_t                    target_pid                      =     -1;
55 static pid_t                    target_tid                      =     -1;
56 static pid_t                    child_pid                       =     -1;
57 static bool                     no_inherit                      =  false;
58 static enum write_mode_t        write_mode                      = WRITE_FORCE;
59 static bool                     call_graph                      =  false;
60 static bool                     inherit_stat                    =  false;
61 static bool                     no_samples                      =  false;
62 static bool                     sample_address                  =  false;
63 static bool                     sample_time                     =  false;
64 static bool                     no_buildid                      =  false;
65 static bool                     no_buildid_cache                =  false;
66 static struct perf_evlist       *evsel_list;
67
68 static long                     samples                         =      0;
69 static u64                      bytes_written                   =      0;
70
71 static int                      file_new                        =      1;
72 static off_t                    post_processing_offset;
73
74 static struct perf_session      *session;
75 static const char               *cpu_list;
76 static const char               *progname;
77
78 static void advance_output(size_t size)
79 {
80         bytes_written += size;
81 }
82
83 static void write_output(void *buf, size_t size)
84 {
85         while (size) {
86                 int ret = write(output, buf, size);
87
88                 if (ret < 0)
89                         die("failed to write");
90
91                 size -= ret;
92                 buf += ret;
93
94                 bytes_written += ret;
95         }
96 }
97
98 static int process_synthesized_event(union perf_event *event,
99                                      struct perf_sample *sample __used,
100                                      struct perf_session *self __used)
101 {
102         write_output(event, event->header.size);
103         return 0;
104 }
105
106 static void mmap_read(struct perf_mmap *md)
107 {
108         unsigned int head = perf_mmap__read_head(md);
109         unsigned int old = md->prev;
110         unsigned char *data = md->base + page_size;
111         unsigned long size;
112         void *buf;
113
114         if (old == head)
115                 return;
116
117         samples++;
118
119         size = head - old;
120
121         if ((old & md->mask) + size != (head & md->mask)) {
122                 buf = &data[old & md->mask];
123                 size = md->mask + 1 - (old & md->mask);
124                 old += size;
125
126                 write_output(buf, size);
127         }
128
129         buf = &data[old & md->mask];
130         size = head - old;
131         old += size;
132
133         write_output(buf, size);
134
135         md->prev = old;
136         perf_mmap__write_tail(md, old);
137 }
138
139 static volatile int done = 0;
140 static volatile int signr = -1;
141 static volatile int child_finished = 0;
142
143 static void sig_handler(int sig)
144 {
145         if (sig == SIGCHLD)
146                 child_finished = 1;
147
148         done = 1;
149         signr = sig;
150 }
151
152 static void sig_atexit(void)
153 {
154         int status;
155
156         if (child_pid > 0) {
157                 if (!child_finished)
158                         kill(child_pid, SIGTERM);
159
160                 wait(&status);
161                 if (WIFSIGNALED(status))
162                         psignal(WTERMSIG(status), progname);
163         }
164
165         if (signr == -1 || signr == SIGUSR1)
166                 return;
167
168         signal(signr, SIG_DFL);
169         kill(getpid(), signr);
170 }
171
172 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
173 {
174         struct perf_event_attr *attr = &evsel->attr;
175         int track = !evsel->idx; /* only the first counter needs these */
176
177         attr->disabled          = 1;
178         attr->inherit           = !no_inherit;
179         attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
180                                   PERF_FORMAT_TOTAL_TIME_RUNNING |
181                                   PERF_FORMAT_ID;
182
183         attr->sample_type       |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
184
185         if (evlist->nr_entries > 1)
186                 attr->sample_type |= PERF_SAMPLE_ID;
187
188         /*
189          * We default some events to a 1 default interval. But keep
190          * it a weak assumption overridable by the user.
191          */
192         if (!attr->sample_period || (user_freq != UINT_MAX &&
193                                      user_interval != ULLONG_MAX)) {
194                 if (freq) {
195                         attr->sample_type       |= PERF_SAMPLE_PERIOD;
196                         attr->freq              = 1;
197                         attr->sample_freq       = freq;
198                 } else {
199                         attr->sample_period = default_interval;
200                 }
201         }
202
203         if (no_samples)
204                 attr->sample_freq = 0;
205
206         if (inherit_stat)
207                 attr->inherit_stat = 1;
208
209         if (sample_address) {
210                 attr->sample_type       |= PERF_SAMPLE_ADDR;
211                 attr->mmap_data = track;
212         }
213
214         if (call_graph)
215                 attr->sample_type       |= PERF_SAMPLE_CALLCHAIN;
216
217         if (system_wide)
218                 attr->sample_type       |= PERF_SAMPLE_CPU;
219
220         if (sample_id_all_avail &&
221             (sample_time || system_wide || !no_inherit || cpu_list))
222                 attr->sample_type       |= PERF_SAMPLE_TIME;
223
224         if (raw_samples) {
225                 attr->sample_type       |= PERF_SAMPLE_TIME;
226                 attr->sample_type       |= PERF_SAMPLE_RAW;
227                 attr->sample_type       |= PERF_SAMPLE_CPU;
228         }
229
230         if (nodelay) {
231                 attr->watermark = 0;
232                 attr->wakeup_events = 1;
233         }
234
235         attr->mmap              = track;
236         attr->comm              = track;
237
238         if (target_pid == -1 && target_tid == -1 && !system_wide) {
239                 attr->disabled = 1;
240                 attr->enable_on_exec = 1;
241         }
242 }
243
244 static bool perf_evlist__equal(struct perf_evlist *evlist,
245                                struct perf_evlist *other)
246 {
247         struct perf_evsel *pos, *pair;
248
249         if (evlist->nr_entries != other->nr_entries)
250                 return false;
251
252         pair = list_entry(other->entries.next, struct perf_evsel, node);
253
254         list_for_each_entry(pos, &evlist->entries, node) {
255                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
256                         return false;
257                 pair = list_entry(pair->node.next, struct perf_evsel, node);
258         }
259
260         return true;
261 }
262
263 static void open_counters(struct perf_evlist *evlist)
264 {
265         struct perf_evsel *pos;
266
267         if (evlist->cpus->map[0] < 0)
268                 no_inherit = true;
269
270         list_for_each_entry(pos, &evlist->entries, node) {
271                 struct perf_event_attr *attr = &pos->attr;
272                 /*
273                  * Check if parse_single_tracepoint_event has already asked for
274                  * PERF_SAMPLE_TIME.
275                  *
276                  * XXX this is kludgy but short term fix for problems introduced by
277                  * eac23d1c that broke 'perf script' by having different sample_types
278                  * when using multiple tracepoint events when we use a perf binary
279                  * that tries to use sample_id_all on an older kernel.
280                  *
281                  * We need to move counter creation to perf_session, support
282                  * different sample_types, etc.
283                  */
284                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
285
286                 config_attr(pos, evlist);
287 retry_sample_id:
288                 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
289 try_again:
290                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
291                         int err = errno;
292
293                         if (err == EPERM || err == EACCES) {
294                                 ui__warning_paranoid();
295                                 exit(EXIT_FAILURE);
296                         } else if (err ==  ENODEV && cpu_list) {
297                                 die("No such device - did you specify"
298                                         " an out-of-range profile CPU?\n");
299                         } else if (err == EINVAL && sample_id_all_avail) {
300                                 /*
301                                  * Old kernel, no attr->sample_id_type_all field
302                                  */
303                                 sample_id_all_avail = false;
304                                 if (!sample_time && !raw_samples && !time_needed)
305                                         attr->sample_type &= ~PERF_SAMPLE_TIME;
306
307                                 goto retry_sample_id;
308                         }
309
310                         /*
311                          * If it's cycles then fall back to hrtimer
312                          * based cpu-clock-tick sw counter, which
313                          * is always available even if no PMU support:
314                          */
315                         if (attr->type == PERF_TYPE_HARDWARE
316                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
317
318                                 if (verbose)
319                                         ui__warning("The cycles event is not supported, "
320                                                     "trying to fall back to cpu-clock-ticks\n");
321                                 attr->type = PERF_TYPE_SOFTWARE;
322                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
323                                 goto try_again;
324                         }
325
326                         if (err == ENOENT) {
327                                 ui__warning("The %s event is not supported.\n",
328                                             event_name(pos));
329                                 exit(EXIT_FAILURE);
330                         }
331
332                         printf("\n");
333                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
334                               err, strerror(err));
335
336 #if defined(__i386__) || defined(__x86_64__)
337                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
338                                 die("No hardware sampling interrupt available."
339                                     " No APIC? If so then you can boot the kernel"
340                                     " with the \"lapic\" boot parameter to"
341                                     " force-enable it.\n");
342 #endif
343
344                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
345                 }
346         }
347
348         if (perf_evlist__set_filters(evlist)) {
349                 error("failed to set filter with %d (%s)\n", errno,
350                         strerror(errno));
351                 exit(-1);
352         }
353
354         if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
355                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
356
357         if (file_new)
358                 session->evlist = evlist;
359         else {
360                 if (!perf_evlist__equal(session->evlist, evlist)) {
361                         fprintf(stderr, "incompatible append\n");
362                         exit(-1);
363                 }
364         }
365
366         perf_session__update_sample_type(session);
367 }
368
369 static int process_buildids(void)
370 {
371         u64 size = lseek(output, 0, SEEK_CUR);
372
373         if (size == 0)
374                 return 0;
375
376         session->fd = output;
377         return __perf_session__process_events(session, post_processing_offset,
378                                               size - post_processing_offset,
379                                               size, &build_id__mark_dso_hit_ops);
380 }
381
382 static void atexit_header(void)
383 {
384         if (!pipe_output) {
385                 session->header.data_size += bytes_written;
386
387                 if (!no_buildid)
388                         process_buildids();
389                 perf_session__write_header(session, evsel_list, output, true);
390                 perf_session__delete(session);
391                 perf_evlist__delete(evsel_list);
392                 symbol__exit();
393         }
394 }
395
396 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
397 {
398         int err;
399         struct perf_session *psession = data;
400
401         if (machine__is_host(machine))
402                 return;
403
404         /*
405          *As for guest kernel when processing subcommand record&report,
406          *we arrange module mmap prior to guest kernel mmap and trigger
407          *a preload dso because default guest module symbols are loaded
408          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
409          *method is used to avoid symbol missing when the first addr is
410          *in module instead of in guest kernel.
411          */
412         err = perf_event__synthesize_modules(process_synthesized_event,
413                                              psession, machine);
414         if (err < 0)
415                 pr_err("Couldn't record guest kernel [%d]'s reference"
416                        " relocation symbol.\n", machine->pid);
417
418         /*
419          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
420          * have no _text sometimes.
421          */
422         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
423                                                  psession, machine, "_text");
424         if (err < 0)
425                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
426                                                          psession, machine,
427                                                          "_stext");
428         if (err < 0)
429                 pr_err("Couldn't record guest kernel [%d]'s reference"
430                        " relocation symbol.\n", machine->pid);
431 }
432
433 static struct perf_event_header finished_round_event = {
434         .size = sizeof(struct perf_event_header),
435         .type = PERF_RECORD_FINISHED_ROUND,
436 };
437
438 static void mmap_read_all(void)
439 {
440         int i;
441
442         for (i = 0; i < evsel_list->nr_mmaps; i++) {
443                 if (evsel_list->mmap[i].base)
444                         mmap_read(&evsel_list->mmap[i]);
445         }
446
447         if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
448                 write_output(&finished_round_event, sizeof(finished_round_event));
449 }
450
451 static int __cmd_record(int argc, const char **argv)
452 {
453         struct stat st;
454         int flags;
455         int err;
456         unsigned long waking = 0;
457         int child_ready_pipe[2], go_pipe[2];
458         const bool forks = argc > 0;
459         char buf;
460         struct machine *machine;
461
462         progname = argv[0];
463
464         page_size = sysconf(_SC_PAGE_SIZE);
465
466         atexit(sig_atexit);
467         signal(SIGCHLD, sig_handler);
468         signal(SIGINT, sig_handler);
469         signal(SIGUSR1, sig_handler);
470
471         if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
472                 perror("failed to create pipes");
473                 exit(-1);
474         }
475
476         if (!output_name) {
477                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
478                         pipe_output = 1;
479                 else
480                         output_name = "perf.data";
481         }
482         if (output_name) {
483                 if (!strcmp(output_name, "-"))
484                         pipe_output = 1;
485                 else if (!stat(output_name, &st) && st.st_size) {
486                         if (write_mode == WRITE_FORCE) {
487                                 char oldname[PATH_MAX];
488                                 snprintf(oldname, sizeof(oldname), "%s.old",
489                                          output_name);
490                                 unlink(oldname);
491                                 rename(output_name, oldname);
492                         }
493                 } else if (write_mode == WRITE_APPEND) {
494                         write_mode = WRITE_FORCE;
495                 }
496         }
497
498         flags = O_CREAT|O_RDWR;
499         if (write_mode == WRITE_APPEND)
500                 file_new = 0;
501         else
502                 flags |= O_TRUNC;
503
504         if (pipe_output)
505                 output = STDOUT_FILENO;
506         else
507                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
508         if (output < 0) {
509                 perror("failed to create output file");
510                 exit(-1);
511         }
512
513         session = perf_session__new(output_name, O_WRONLY,
514                                     write_mode == WRITE_FORCE, false, NULL);
515         if (session == NULL) {
516                 pr_err("Not enough memory for reading perf file header\n");
517                 return -1;
518         }
519
520         if (!no_buildid)
521                 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
522
523         if (!file_new) {
524                 err = perf_session__read_header(session, output);
525                 if (err < 0)
526                         goto out_delete_session;
527         }
528
529         if (have_tracepoints(&evsel_list->entries))
530                 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
531
532         /* 512 kiB: default amount of unprivileged mlocked memory */
533         if (mmap_pages == UINT_MAX)
534                 mmap_pages = (512 * 1024) / page_size;
535
536         if (forks) {
537                 child_pid = fork();
538                 if (child_pid < 0) {
539                         perror("failed to fork");
540                         exit(-1);
541                 }
542
543                 if (!child_pid) {
544                         if (pipe_output)
545                                 dup2(2, 1);
546                         close(child_ready_pipe[0]);
547                         close(go_pipe[1]);
548                         fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
549
550                         /*
551                          * Do a dummy execvp to get the PLT entry resolved,
552                          * so we avoid the resolver overhead on the real
553                          * execvp call.
554                          */
555                         execvp("", (char **)argv);
556
557                         /*
558                          * Tell the parent we're ready to go
559                          */
560                         close(child_ready_pipe[1]);
561
562                         /*
563                          * Wait until the parent tells us to go.
564                          */
565                         if (read(go_pipe[0], &buf, 1) == -1)
566                                 perror("unable to read pipe");
567
568                         execvp(argv[0], (char **)argv);
569
570                         perror(argv[0]);
571                         kill(getppid(), SIGUSR1);
572                         exit(-1);
573                 }
574
575                 if (!system_wide && target_tid == -1 && target_pid == -1)
576                         evsel_list->threads->map[0] = child_pid;
577
578                 close(child_ready_pipe[1]);
579                 close(go_pipe[0]);
580                 /*
581                  * wait for child to settle
582                  */
583                 if (read(child_ready_pipe[0], &buf, 1) == -1) {
584                         perror("unable to read pipe");
585                         exit(-1);
586                 }
587                 close(child_ready_pipe[0]);
588         }
589
590         open_counters(evsel_list);
591
592         /*
593          * perf_session__delete(session) will be called at atexit_header()
594          */
595         atexit(atexit_header);
596
597         if (pipe_output) {
598                 err = perf_header__write_pipe(output);
599                 if (err < 0)
600                         return err;
601         } else if (file_new) {
602                 err = perf_session__write_header(session, evsel_list,
603                                                  output, false);
604                 if (err < 0)
605                         return err;
606         }
607
608         post_processing_offset = lseek(output, 0, SEEK_CUR);
609
610         if (pipe_output) {
611                 err = perf_session__synthesize_attrs(session,
612                                                      process_synthesized_event);
613                 if (err < 0) {
614                         pr_err("Couldn't synthesize attrs.\n");
615                         return err;
616                 }
617
618                 err = perf_event__synthesize_event_types(process_synthesized_event,
619                                                          session);
620                 if (err < 0) {
621                         pr_err("Couldn't synthesize event_types.\n");
622                         return err;
623                 }
624
625                 if (have_tracepoints(&evsel_list->entries)) {
626                         /*
627                          * FIXME err <= 0 here actually means that
628                          * there were no tracepoints so its not really
629                          * an error, just that we don't need to
630                          * synthesize anything.  We really have to
631                          * return this more properly and also
632                          * propagate errors that now are calling die()
633                          */
634                         err = perf_event__synthesize_tracing_data(output, evsel_list,
635                                                                   process_synthesized_event,
636                                                                   session);
637                         if (err <= 0) {
638                                 pr_err("Couldn't record tracing data.\n");
639                                 return err;
640                         }
641                         advance_output(err);
642                 }
643         }
644
645         machine = perf_session__find_host_machine(session);
646         if (!machine) {
647                 pr_err("Couldn't find native kernel information.\n");
648                 return -1;
649         }
650
651         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
652                                                  session, machine, "_text");
653         if (err < 0)
654                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
655                                                          session, machine, "_stext");
656         if (err < 0)
657                 pr_err("Couldn't record kernel reference relocation symbol\n"
658                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
659                        "Check /proc/kallsyms permission or run as root.\n");
660
661         err = perf_event__synthesize_modules(process_synthesized_event,
662                                              session, machine);
663         if (err < 0)
664                 pr_err("Couldn't record kernel module information.\n"
665                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
666                        "Check /proc/modules permission or run as root.\n");
667
668         if (perf_guest)
669                 perf_session__process_machines(session,
670                                                perf_event__synthesize_guest_os);
671
672         if (!system_wide)
673                 perf_event__synthesize_thread_map(evsel_list->threads,
674                                                   process_synthesized_event,
675                                                   session);
676         else
677                 perf_event__synthesize_threads(process_synthesized_event,
678                                                session);
679
680         if (realtime_prio) {
681                 struct sched_param param;
682
683                 param.sched_priority = realtime_prio;
684                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
685                         pr_err("Could not set realtime priority.\n");
686                         exit(-1);
687                 }
688         }
689
690         perf_evlist__enable(evsel_list);
691
692         /*
693          * Let the child rip
694          */
695         if (forks)
696                 close(go_pipe[1]);
697
698         for (;;) {
699                 int hits = samples;
700
701                 mmap_read_all();
702
703                 if (hits == samples) {
704                         if (done)
705                                 break;
706                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
707                         waking++;
708                 }
709
710                 if (done)
711                         perf_evlist__disable(evsel_list);
712         }
713
714         if (quiet || signr == SIGUSR1)
715                 return 0;
716
717         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
718
719         /*
720          * Approximate RIP event size: 24 bytes.
721          */
722         fprintf(stderr,
723                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
724                 (double)bytes_written / 1024.0 / 1024.0,
725                 output_name,
726                 bytes_written / 24);
727
728         return 0;
729
730 out_delete_session:
731         perf_session__delete(session);
732         return err;
733 }
734
735 static const char * const record_usage[] = {
736         "perf record [<options>] [<command>]",
737         "perf record [<options>] -- <command> [<options>]",
738         NULL
739 };
740
741 static bool force, append_file;
742
743 const struct option record_options[] = {
744         OPT_CALLBACK('e', "event", &evsel_list, "event",
745                      "event selector. use 'perf list' to list available events",
746                      parse_events_option),
747         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
748                      "event filter", parse_filter),
749         OPT_INTEGER('p', "pid", &target_pid,
750                     "record events on existing process id"),
751         OPT_INTEGER('t', "tid", &target_tid,
752                     "record events on existing thread id"),
753         OPT_INTEGER('r', "realtime", &realtime_prio,
754                     "collect data with this RT SCHED_FIFO priority"),
755         OPT_BOOLEAN('D', "no-delay", &nodelay,
756                     "collect data without buffering"),
757         OPT_BOOLEAN('R', "raw-samples", &raw_samples,
758                     "collect raw sample records from all opened counters"),
759         OPT_BOOLEAN('a', "all-cpus", &system_wide,
760                             "system-wide collection from all CPUs"),
761         OPT_BOOLEAN('A', "append", &append_file,
762                             "append to the output file to do incremental profiling"),
763         OPT_STRING('C', "cpu", &cpu_list, "cpu",
764                     "list of cpus to monitor"),
765         OPT_BOOLEAN('f', "force", &force,
766                         "overwrite existing data file (deprecated)"),
767         OPT_U64('c', "count", &user_interval, "event period to sample"),
768         OPT_STRING('o', "output", &output_name, "file",
769                     "output file name"),
770         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
771                     "child tasks do not inherit counters"),
772         OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
773         OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
774         OPT_BOOLEAN(0, "group", &group,
775                     "put the counters into a counter group"),
776         OPT_BOOLEAN('g', "call-graph", &call_graph,
777                     "do call-graph (stack chain/backtrace) recording"),
778         OPT_INCR('v', "verbose", &verbose,
779                     "be more verbose (show counter open errors, etc)"),
780         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
781         OPT_BOOLEAN('s', "stat", &inherit_stat,
782                     "per thread counts"),
783         OPT_BOOLEAN('d', "data", &sample_address,
784                     "Sample addresses"),
785         OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
786         OPT_BOOLEAN('n', "no-samples", &no_samples,
787                     "don't sample"),
788         OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
789                     "do not update the buildid cache"),
790         OPT_BOOLEAN('B', "no-buildid", &no_buildid,
791                     "do not collect buildids in perf.data"),
792         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
793                      "monitor event in cgroup name only",
794                      parse_cgroups),
795         OPT_END()
796 };
797
798 int cmd_record(int argc, const char **argv, const char *prefix __used)
799 {
800         int err = -ENOMEM;
801         struct perf_evsel *pos;
802
803         evsel_list = perf_evlist__new(NULL, NULL);
804         if (evsel_list == NULL)
805                 return -ENOMEM;
806
807         argc = parse_options(argc, argv, record_options, record_usage,
808                             PARSE_OPT_STOP_AT_NON_OPTION);
809         if (!argc && target_pid == -1 && target_tid == -1 &&
810                 !system_wide && !cpu_list)
811                 usage_with_options(record_usage, record_options);
812
813         if (force && append_file) {
814                 fprintf(stderr, "Can't overwrite and append at the same time."
815                                 " You need to choose between -f and -A");
816                 usage_with_options(record_usage, record_options);
817         } else if (append_file) {
818                 write_mode = WRITE_APPEND;
819         } else {
820                 write_mode = WRITE_FORCE;
821         }
822
823         if (nr_cgroups && !system_wide) {
824                 fprintf(stderr, "cgroup monitoring only available in"
825                         " system-wide mode\n");
826                 usage_with_options(record_usage, record_options);
827         }
828
829         symbol__init();
830
831         if (symbol_conf.kptr_restrict)
832                 pr_warning(
833 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
834 "check /proc/sys/kernel/kptr_restrict.\n\n"
835 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
836 "file is not found in the buildid cache or in the vmlinux path.\n\n"
837 "Samples in kernel modules won't be resolved at all.\n\n"
838 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
839 "even with a suitable vmlinux or kallsyms file.\n\n");
840
841         if (no_buildid_cache || no_buildid)
842                 disable_buildid_cache();
843
844         if (evsel_list->nr_entries == 0 &&
845             perf_evlist__add_default(evsel_list) < 0) {
846                 pr_err("Not enough memory for event selector list\n");
847                 goto out_symbol_exit;
848         }
849
850         if (target_pid != -1)
851                 target_tid = target_pid;
852
853         if (perf_evlist__create_maps(evsel_list, target_pid,
854                                      target_tid, cpu_list) < 0)
855                 usage_with_options(record_usage, record_options);
856
857         list_for_each_entry(pos, &evsel_list->entries, node) {
858                 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
859                                          evsel_list->threads->nr) < 0)
860                         goto out_free_fd;
861                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
862                         goto out_free_fd;
863         }
864
865         if (perf_evlist__alloc_pollfd(evsel_list) < 0)
866                 goto out_free_fd;
867
868         if (user_interval != ULLONG_MAX)
869                 default_interval = user_interval;
870         if (user_freq != UINT_MAX)
871                 freq = user_freq;
872
873         /*
874          * User specified count overrides default frequency.
875          */
876         if (default_interval)
877                 freq = 0;
878         else if (freq) {
879                 default_interval = freq;
880         } else {
881                 fprintf(stderr, "frequency and count are zero, aborting\n");
882                 err = -EINVAL;
883                 goto out_free_fd;
884         }
885
886         err = __cmd_record(argc, argv);
887 out_free_fd:
888         perf_evlist__delete_maps(evsel_list);
889 out_symbol_exit:
890         symbol__exit();
891         return err;
892 }