From 4ba1faa19fa5f415bd69b1d7c366028332468bca Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 10 Jul 2015 07:36:10 +0000 Subject: [PATCH] perf record: Allow filtering perf's pid via --exclude-perf This patch allows 'perf record' to exclude events issued by perf itself by '--exclude-perf' option. Before this patch, when doing something like: # perf record -a -e syscalls:sys_enter_write One could easily get result like this: # /tmp/perf report --stdio ... # Overhead Command Shared Object Symbol # ........ ....... .................. .................... # 99.99% perf libpthread-2.18.so [.] __write_nocancel 0.01% ls libc-2.18.so [.] write 0.01% sshd libc-2.18.so [.] write ... Where most events are generated by perf itself. A shell trick can be done to filter perf itself out: # cat << EOF > ./tmp > #!/bin/sh > exec perf record -e ... --filter="common_pid != \$\$" -a sleep 10 > EOF # chmod a+x ./tmp # ./tmp However, doing so is user unfriendly. This patch extracts evsel iteration framework introduced by patch 'perf record: Apply filter to all events in a glob matching' into foreach_evsel_in_last_glob(), and makes exclude_perf() function append new filter expression to each evsel selected by a '-e' selector. To avoid losing filters if user pass '--filter' after '--exclude-perf', this patch uses perf_evsel__append_filter() in both case, instead of perf_evsel__set_filter() which removes old filter. As a side effect, now it is possible to use multiple '--filter' option for one selector. They are combinded with '&&'. Signed-off-by: Wang Nan Cc: Andi Kleen Cc: Brendan Gregg Cc: Steven Rostedt Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1436513770-8896-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 11 +++- tools/perf/builtin-record.c | 3 + tools/perf/util/parse-events.c | 83 ++++++++++++++++++++---- tools/perf/util/parse-events.h | 1 + 4 files changed, 84 insertions(+), 14 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5b47b2c88223..29e5307945bf 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -69,7 +69,16 @@ OPTIONS "perf report" to view group events together. --filter=:: - Event filter. + Event filter. This option should follow a event selector (-e) which + selects tracepoint event(s). Multiple '--filter' options are combined + using '&&'. + +--exclude-perf:: + Don't record events issued by perf itself. This option should follow + a event selector (-e) which selects tracepoint event(s). It adds a + filter expression 'common_pid != $PERFPID' to filters. If other + '--filter' exists, the new filter expression will be combined with + them by '&&'. -a:: --all-cpus:: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 283fe96bdfc1..1932e27c00d8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -992,6 +992,9 @@ struct option __record_options[] = { parse_events_option), OPT_CALLBACK(0, "filter", &record.evlist, "filter", "event filter", parse_filter), + OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, + NULL, "don't record events from perf itself", + exclude_perf), OPT_STRING('p', "pid", &record.opts.target.pid, "pid", "record events on existing process id"), OPT_STRING('t', "tid", &record.opts.target.tid, "tid", diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index bbb7fbc2857e..4f807fc1b14a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1167,27 +1167,24 @@ int parse_events_option(const struct option *opt, const char *str, return ret; } -int parse_filter(const struct option *opt, const char *str, - int unset __maybe_unused) +static int +foreach_evsel_in_last_glob(struct perf_evlist *evlist, + int (*func)(struct perf_evsel *evsel, + const void *arg), + const void *arg) { - struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; struct perf_evsel *last = NULL; + int err; if (evlist->nr_entries > 0) last = perf_evlist__last(evlist); do { - if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { - fprintf(stderr, - "--filter option should follow a -e tracepoint option\n"); - return -1; - } - - if (perf_evsel__set_filter(last, str) < 0) { - fprintf(stderr, - "not enough memory to hold filter string\n"); + err = (*func)(last, arg); + if (err) return -1; - } + if (!last) + return 0; if (last->node.prev == &evlist->entries) return 0; @@ -1197,6 +1194,66 @@ int parse_filter(const struct option *opt, const char *str, return 0; } +static int set_filter(struct perf_evsel *evsel, const void *arg) +{ + const char *str = arg; + + if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) { + fprintf(stderr, + "--filter option should follow a -e tracepoint option\n"); + return -1; + } + + if (perf_evsel__append_filter(evsel, "&&", str) < 0) { + fprintf(stderr, + "not enough memory to hold filter string\n"); + return -1; + } + + return 0; +} + +int parse_filter(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + + return foreach_evsel_in_last_glob(evlist, set_filter, + (const void *)str); +} + +static int add_exclude_perf_filter(struct perf_evsel *evsel, + const void *arg __maybe_unused) +{ + char new_filter[64]; + + if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) { + fprintf(stderr, + "--exclude-perf option should follow a -e tracepoint option\n"); + return -1; + } + + snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid()); + + if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) { + fprintf(stderr, + "not enough memory to hold filter string\n"); + return -1; + } + + return 0; +} + +int exclude_perf(const struct option *opt, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + + return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter, + NULL); +} + static const char * const event_type_descriptors[] = { "Hardware event", "Software event", diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 131f29b2f132..2063048a4354 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -34,6 +34,7 @@ extern int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *error); extern int parse_events_terms(struct list_head *terms, const char *str); extern int parse_filter(const struct option *opt, const char *str, int unset); +extern int exclude_perf(const struct option *opt, const char *arg, int unset); #define EVENTS_HELP_MAX (128*1024) -- 2.39.5