From: Ingo Molnar Date: Thu, 16 Mar 2017 16:29:23 +0000 (+0100) Subject: Merge tag 'perf-core-for-mingo-4.12-20170316' of git://git.kernel.org/pub/scm/linux... X-Git-Tag: v4.12-rc1~152^2~36 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=61f63e383784bd0ab6529cfc95ddc59c713afcc9;hp=ee368428aac96d94a9804b9109a81355451c3cd9;p=karo-tx-linux.git Merge tag 'perf-core-for-mingo-4.12-20170316' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New features: - Add 'brstackinsn' field in 'perf script' to reuse the x86 instruction decoder used in the Intel PT code to study hot paths to samples (Andi Kleen) Kernel changes: - Default UPROBES_EVENTS to Y (Alexei Starovoitov) - Fix check for kretprobe offset within function entry (Naveen N. Rao) Infrastructure changes: - Introduce util func is_sdt_event() (Ravi Bangoria) - Make perf_event__synthesize_mmap_events() scale on older kernels where reading /proc/pid/maps is way slower than reading /proc/pid/task/pid/maps (Stephane Eranian) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 177bdf6c6aeb..47e4da5b4fa2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -268,6 +268,7 @@ extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern bool arch_function_offset_within_entry(unsigned long offset); +extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 4780ec236035..d733479a10ee 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr) * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, + const char *symbol_name, unsigned int offset) { - kprobe_opcode_t *addr = p->addr; - - if ((p->symbol_name && p->addr) || - (!p->symbol_name && !p->addr)) + if ((symbol_name && addr) || (!symbol_name && !addr)) goto invalid; - if (p->symbol_name) { - kprobe_lookup_name(p->symbol_name, addr); + if (symbol_name) { + kprobe_lookup_name(symbol_name, addr); if (!addr) return ERR_PTR(-ENOENT); } - addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); + addr = (kprobe_opcode_t *)(((char *)addr) + offset); if (addr) return addr; @@ -1413,6 +1411,11 @@ invalid: return ERR_PTR(-EINVAL); } +static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +{ + return _kprobe_addr(p->addr, p->symbol_name, p->offset); +} + /* Check passed kprobe is valid and return kprobe in kprobe_table. */ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { @@ -1881,19 +1884,28 @@ bool __weak arch_function_offset_within_entry(unsigned long offset) return !offset; } +bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) +{ + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); + + if (IS_ERR(kp_addr)) + return false; + + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) || + !arch_function_offset_within_entry(offset)) + return false; + + return true; +} + int register_kretprobe(struct kretprobe *rp) { int ret = 0; struct kretprobe_instance *inst; int i; void *addr; - unsigned long offset; - - addr = kprobe_addr(&rp->kp); - if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset)) - return -EINVAL; - if (!arch_function_offset_within_entry(offset)) + if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) return -EINVAL; if (kretprobe_blacklist_size) { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d4a06e714645..9619b5768e4b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -455,7 +455,7 @@ config UPROBE_EVENTS select UPROBES select PROBE_EVENTS select TRACING - default n + default y help This allows the user to add tracing events on top of userspace dynamic events (similar to tracepoints) on the fly via the trace diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 12fb540da0e5..013f4e7146d4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -697,7 +697,7 @@ static int create_trace_kprobe(int argc, char **argv) return ret; } if (offset && is_return && - !arch_function_offset_within_entry(offset)) { + !function_offset_within_entry(NULL, symbol, offset)) { pr_info("Given offset is not valid for return probe.\n"); return -EINVAL; } diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 293149a1c6a1..4e7772387c6e 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ @@ -186,7 +186,7 @@ * * Reuse free bits when adding new feature flags! */ - +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ @@ -321,5 +321,4 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ - #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 62c9b0c77a3a..cb0eda3925e6 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,7 +116,7 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -189,15 +189,20 @@ OPTIONS i.e., -F "" is not allowed. The brstack output includes branch related information with raw addresses using the - /v/v/v/v/ syntax in the following order: + /v/v/v/v/cycles syntax in the following order: FROM: branch source instruction TO : branch target instruction M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported X/- : X=branch inside a transactional region, -=not in transaction region or not supported A/- : A=TSX abort entry, -=not aborted region or not supported + cycles The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible. + When brstackinsn is specified the full assembler sequences of branch sequences for each sample + is printed. This is the full execution path leading to the sample. This is only supported when the + sample was recorded with perf record -b or -j any. + -k:: --vmlinux=:: vmlinux pathname @@ -302,6 +307,10 @@ include::itrace.txt[] stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. +--max-blocks:: + Set the maximum number of program blocks to print with brstackasm for + each sample. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 66d62c98dff9..c98e16689b57 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -28,6 +28,7 @@ #include #include "asm/bug.h" #include "util/mem-events.h" +#include "util/dump-insn.h" static char const *script_name; static char const *generate_script_lang; @@ -42,6 +43,7 @@ static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; +static int max_blocks; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -69,6 +71,7 @@ enum perf_output_field { PERF_OUTPUT_CALLINDENT = 1U << 20, PERF_OUTPUT_INSN = 1U << 21, PERF_OUTPUT_INSNLEN = 1U << 22, + PERF_OUTPUT_BRSTACKINSN = 1U << 23, }; struct output_option { @@ -98,6 +101,7 @@ struct output_option { {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, {.str = "insn", .field = PERF_OUTPUT_INSN}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, + {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, }; /* default set to maintain compatibility with current format */ @@ -292,7 +296,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - + if (PRINT_FIELD(BRSTACKINSN) && + !(perf_evlist__combined_branch_type(session->evlist) & + PERF_SAMPLE_BRANCH_ANY)) { + pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" + "Hint: run 'perf record -b ...'\n"); + return -EINVAL; + } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID)) @@ -546,6 +556,233 @@ static void print_sample_brstacksym(struct perf_sample *sample, } } +#define MAXBB 16384UL + +static int grab_bb(u8 *buffer, u64 start, u64 end, + struct machine *machine, struct thread *thread, + bool *is64bit, u8 *cpumode, bool last) +{ + long offset, len; + struct addr_location al; + bool kernel; + + if (!start || !end) + return 0; + + kernel = machine__kernel_ip(machine, start); + if (kernel) + *cpumode = PERF_RECORD_MISC_KERNEL; + else + *cpumode = PERF_RECORD_MISC_USER; + + /* + * Block overlaps between kernel and user. + * This can happen due to ring filtering + * On Intel CPUs the entry into the kernel is filtered, + * but the exit is not. Let the caller patch it up. + */ + if (kernel != machine__kernel_ip(machine, end)) { + printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", + start, end); + return -ENXIO; + } + + memset(&al, 0, sizeof(al)); + if (end - start > MAXBB - MAXINSN) { + if (last) + printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); + else + printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); + return 0; + } + + thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); + if (!al.map || !al.map->dso) { + printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + return 0; + } + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) { + printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + return 0; + } + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map); + + offset = al.map->map_ip(al.map, start); + len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer, + end - start + MAXINSN); + + *is64bit = al.map->dso->is_64_bit; + if (len <= 0) + printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", + start, end); + return len; +} + +static void print_jump(uint64_t ip, struct branch_entry *en, + struct perf_insn *x, u8 *inbuf, int len, + int insn) +{ + printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", + ip, + dump_insn(x, ip, inbuf, len, NULL), + en->flags.predicted ? " PRED" : "", + en->flags.mispred ? " MISPRED" : "", + en->flags.in_tx ? " INTX" : "", + en->flags.abort ? " ABORT" : ""); + if (en->flags.cycles) { + printf(" %d cycles", en->flags.cycles); + if (insn) + printf(" %.2f IPC", (float)insn / en->flags.cycles); + } + putchar('\n'); +} + +static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, + uint64_t addr, struct symbol **lastsym, + struct perf_event_attr *attr) +{ + struct addr_location al; + int off; + + memset(&al, 0, sizeof(al)); + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); + if (!al.map) + thread__find_addr_map(thread, cpumode, MAP__VARIABLE, + addr, &al); + if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) + return; + + al.cpu = cpu; + al.sym = NULL; + if (al.map) + al.sym = map__find_symbol(al.map, al.addr); + + if (!al.sym) + return; + + if (al.addr < al.sym->end) + off = al.addr - al.sym->start; + else + off = al.addr - al.map->start - al.sym->start; + printf("\t%s", al.sym->name); + if (off) + printf("%+d", off); + putchar(':'); + if (PRINT_FIELD(SRCLINE)) + map__fprintf_srcline(al.map, al.addr, "\t", stdout); + putchar('\n'); + *lastsym = al.sym; +} + +static void print_sample_brstackinsn(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, + struct machine *machine) +{ + struct branch_stack *br = sample->branch_stack; + u64 start, end; + int i, insn, len, nr, ilen; + struct perf_insn x; + u8 buffer[MAXBB]; + unsigned off; + struct symbol *lastsym = NULL; + + if (!(br && br->nr)) + return; + nr = br->nr; + if (max_blocks && nr > max_blocks + 1) + nr = max_blocks + 1; + + x.thread = thread; + x.cpu = sample->cpu; + + putchar('\n'); + + /* Handle first from jump, of which we don't know the entry. */ + len = grab_bb(buffer, br->entries[nr-1].from, + br->entries[nr-1].from, + machine, thread, &x.is64bit, &x.cpumode, false); + if (len > 0) { + print_ip_sym(thread, x.cpumode, x.cpu, + br->entries[nr - 1].from, &lastsym, attr); + print_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + &x, buffer, len, 0); + } + + /* Print all blocks */ + for (i = nr - 2; i >= 0; i--) { + if (br->entries[i].from || br->entries[i].to) + pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, + br->entries[i].from, + br->entries[i].to); + start = br->entries[i + 1].to; + end = br->entries[i].from; + + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); + /* Patch up missing kernel transfers due to ring filters */ + if (len == -ENXIO && i > 0) { + end = br->entries[--i].from; + pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); + } + if (len <= 0) + continue; + + insn = 0; + for (off = 0;; off += ilen) { + uint64_t ip = start + off; + + print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr); + if (ip == end) { + print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn); + break; + } else { + printf("\t%016" PRIx64 "\t%s\n", ip, + dump_insn(&x, ip, buffer + off, len - off, &ilen)); + if (ilen == 0) + break; + insn++; + } + } + } + + /* + * Hit the branch? In this case we are already done, and the target + * has not been executed yet. + */ + if (br->entries[0].from == sample->ip) + return; + if (br->entries[0].flags.abort) + return; + + /* + * Print final block upto sample + */ + start = br->entries[0].to; + end = sample->ip; + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true); + print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr); + if (len <= 0) { + /* Print at least last IP if basic block did not work */ + len = grab_bb(buffer, sample->ip, sample->ip, + machine, thread, &x.is64bit, &x.cpumode, false); + if (len <= 0) + return; + + printf("\t%016" PRIx64 "\t%s\n", sample->ip, + dump_insn(&x, sample->ip, buffer, len, NULL)); + return; + } + for (off = 0; off <= end - start; off += ilen) { + printf("\t%016" PRIx64 "\t%s\n", start + off, + dump_insn(&x, start + off, buffer + off, len - off, &ilen)); + if (ilen == 0) + break; + } +} static void print_sample_addr(struct perf_sample *sample, struct thread *thread, @@ -632,7 +869,9 @@ static void print_sample_callindent(struct perf_sample *sample, } static void print_insn(struct perf_sample *sample, - struct perf_event_attr *attr) + struct perf_event_attr *attr, + struct thread *thread, + struct machine *machine) { if (PRINT_FIELD(INSNLEN)) printf(" ilen: %d", sample->insn_len); @@ -643,12 +882,15 @@ static void print_insn(struct perf_sample *sample, for (i = 0; i < sample->insn_len; i++) printf(" %02x", (unsigned char)sample->insn[i]); } + if (PRINT_FIELD(BRSTACKINSN)) + print_sample_brstackinsn(sample, thread, attr, machine); } static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, - struct addr_location *al) + struct addr_location *al, + struct machine *machine) { struct perf_event_attr *attr = &evsel->attr; bool print_srcline_last = false; @@ -689,7 +931,7 @@ static void print_sample_bts(struct perf_sample *sample, if (print_srcline_last) map__fprintf_srcline(al->map, al->addr, "\n ", stdout); - print_insn(sample, attr); + print_insn(sample, attr, thread, machine); printf("\n"); } @@ -872,7 +1114,8 @@ static size_t data_src__printf(u64 data_src) static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, - struct addr_location *al) + struct addr_location *al, + struct machine *machine) { struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; @@ -899,7 +1142,7 @@ static void process_event(struct perf_script *script, print_sample_flags(sample->flags); if (is_bts_event(attr)) { - print_sample_bts(sample, evsel, thread, al); + print_sample_bts(sample, evsel, thread, al, machine); return; } @@ -937,7 +1180,7 @@ static void process_event(struct perf_script *script, if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); - print_insn(sample, attr); + print_insn(sample, attr, thread, machine); printf("\n"); } @@ -1047,7 +1290,7 @@ static int process_sample_event(struct perf_tool *tool, if (scripting_ops) scripting_ops->process_event(event, sample, evsel, &al); else - process_event(scr, sample, evsel, &al); + process_event(scr, sample, evsel, &al, machine); out_put: addr_location__put(&al); @@ -2191,7 +2434,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,period,iregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen", parse_output_fields), + "bpf-output,callindent,insn,insnlen,brstackinsn", + parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -2222,6 +2466,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, "Show namespace events (if recorded)"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), + OPT_INTEGER(0, "max-blocks", &max_blocks, + "Maximum number of code blocks to dump with brstackinsn"), OPT_BOOLEAN(0, "ns", &nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2ea5ee179a3b..fb4f42f1bb38 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -82,6 +82,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o +libperf-y += dump-insn.o libperf-y += parse-regs-options.o libperf-y += term.o libperf-y += help-unknown-cmd.o diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c new file mode 100644 index 000000000000..ffbdb19f05d0 --- /dev/null +++ b/tools/perf/util/dump-insn.c @@ -0,0 +1,14 @@ +#include +#include "dump-insn.h" + +/* Fallback code */ + +__weak +const char *dump_insn(struct perf_insn *x __maybe_unused, + u64 ip __maybe_unused, u8 *inbuf __maybe_unused, + int inlen __maybe_unused, int *lenp) +{ + if (lenp) + *lenp = 0; + return "?"; +} diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h new file mode 100644 index 000000000000..90fb115981cf --- /dev/null +++ b/tools/perf/util/dump-insn.h @@ -0,0 +1,22 @@ +#ifndef __PERF_DUMP_INSN_H +#define __PERF_DUMP_INSN_H 1 + +#define MAXINSN 15 + +#include + +struct thread; + +struct perf_insn { + /* Initialized by callers: */ + struct thread *thread; + u8 cpumode; + bool is64bit; + int cpu; + /* Temporary */ + char out[256]; +}; + +const char *dump_insn(struct perf_insn *x, u64 ip, + u8 *inbuf, int inlen, int *lenp); +#endif diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index d082cb70445d..33fc2e9c0b0c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -325,8 +325,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, if (machine__is_default_guest(machine)) return 0; - snprintf(filename, sizeof(filename), "%s/proc/%d/maps", - machine->root_dir, pid); + snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); fp = fopen(filename, "r"); if (fp == NULL) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 459352a9978f..54818828023b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -26,6 +26,7 @@ #include "insn.c" #include "intel-pt-insn-decoder.h" +#include "dump-insn.h" #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN #error Instruction buffer size too small @@ -179,6 +180,29 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, return 0; } +const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, + u8 *inbuf, int inlen, int *lenp) +{ + struct insn insn; + int n, i; + int left; + + insn_init(&insn, inbuf, inlen, x->is64bit); + insn_get_length(&insn); + if (!insn_complete(&insn) || insn.length > inlen) + return ""; + if (lenp) + *lenp = insn.length; + left = sizeof(x->out); + n = snprintf(x->out, left, "insn: "); + left -= n; + for (i = 0; i < insn.length; i++) { + n += snprintf(x->out + n, left, "%02x ", inbuf[i]); + left -= n; + } + return x->out; +} + const char *branch_name[] = { [INTEL_PT_OP_OTHER] = "Other", [INTEL_PT_OP_CALL] = "Call", diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1af6a267c21b..8c72b0ff7fcb 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -8,6 +8,7 @@ #include #include #include +#include struct list_head; struct perf_evsel; @@ -196,4 +197,23 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); +#ifdef HAVE_LIBELF_SUPPORT +/* + * If the probe point starts with '%', + * or starts with "sdt_" and has a ':' but no '=', + * then it should be a SDT/cached probe point. + */ +static inline bool is_sdt_event(char *str) +{ + return (str[0] == '%' || + (!strncmp(str, "sdt_", 4) && + !!strchr(str, ':') && !strchr(str, '='))); +} +#else +static inline bool is_sdt_event(char *str __maybe_unused) +{ + return false; +} +#endif /* HAVE_LIBELF_SUPPORT */ + #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c9bdc9ded0c3..b19d17801beb 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1341,14 +1341,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (!arg) return -EINVAL; - /* - * If the probe point starts with '%', - * or starts with "sdt_" and has a ':' but no '=', - * then it should be a SDT/cached probe point. - */ - if (arg[0] == '%' || - (!strncmp(arg, "sdt_", 4) && - !!strchr(arg, ':') && !strchr(arg, '='))) { + if (is_sdt_event(arg)) { pev->sdt = true; if (arg[0] == '%') arg++;