]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge commit 'v2.6.37-rc8' into perf/core
authorIngo Molnar <mingo@elte.hu>
Tue, 4 Jan 2011 07:08:51 +0000 (08:08 +0100)
committerIngo Molnar <mingo@elte.hu>
Tue, 4 Jan 2011 07:08:54 +0000 (08:08 +0100)
Merge reason: pick up latest -rc.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
184 files changed:
CREDITS
Documentation/kernel-parameters.txt
MAINTAINERS
arch/alpha/include/asm/perf_event.h
arch/alpha/kernel/irq_alpha.c
arch/alpha/kernel/perf_event.c
arch/arm/kernel/perf_event.c
arch/mips/kernel/perf_event_mipsxx.c
arch/powerpc/kernel/e500-pmu.c
arch/powerpc/kernel/mpc7450-pmu.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/powerpc/kernel/power4-pmu.c
arch/powerpc/kernel/power5+-pmu.c
arch/powerpc/kernel/power5-pmu.c
arch/powerpc/kernel/power6-pmu.c
arch/powerpc/kernel/power7-pmu.c
arch/powerpc/kernel/ppc970-pmu.c
arch/sh/kernel/cpu/sh4/perf_event.c
arch/sh/kernel/cpu/sh4a/perf_event.c
arch/sh/kernel/perf_event.c
arch/sparc/include/asm/perf_event.h
arch/sparc/kernel/nmi.c
arch/sparc/kernel/perf_event.c
arch/x86/include/asm/alternative.h
arch/x86/include/asm/irq.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nmi.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/perf_event_p4.h
arch/x86/include/asm/smpboot_hooks.h
arch/x86/include/asm/stacktrace.h
arch/x86/include/asm/timer.h
arch/x86/kernel/alternative.c
arch/x86/kernel/apic/Makefile
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/hw_nmi.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/nmi.c [deleted file]
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_amd.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perfctr-watchdog.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/kprobes.c
arch/x86/kernel/process.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/stacktrace.c
arch/x86/kernel/time.c
arch/x86/kernel/traps.c
arch/x86/mm/kmemcheck/error.c
arch/x86/oprofile/backtrace.c
arch/x86/oprofile/nmi_int.c
arch/x86/oprofile/nmi_timer_int.c
arch/x86/oprofile/op_model_amd.c
arch/x86/oprofile/op_model_p4.c
drivers/acpi/acpica/nsinit.c
drivers/watchdog/hpwdt.c
include/linux/ftrace_event.h
include/linux/kprobes.h
include/linux/nmi.h
include/linux/perf_event.h
include/linux/sched.h
include/linux/stacktrace.h
include/linux/syscalls.h
include/linux/tracepoint.h
include/trace/define_trace.h
include/trace/events/syscalls.h
include/trace/ftrace.h
init/main.c
kernel/hw_breakpoint.c
kernel/kprobes.c
kernel/perf_event.c
kernel/sched.c
kernel/sysctl.c
kernel/sysctl_binary.c
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_export.c
kernel/watchdog.c
lib/Kconfig.debug
scripts/Makefile.build
tools/perf/Documentation/perf-annotate.txt
tools/perf/Documentation/perf-buildid-list.txt
tools/perf/Documentation/perf-diff.txt
tools/perf/Documentation/perf-kvm.txt
tools/perf/Documentation/perf-lock.txt
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-sched.txt
tools/perf/Documentation/perf-script-perl.txt [moved from tools/perf/Documentation/perf-trace-perl.txt with 90% similarity]
tools/perf/Documentation/perf-script-python.txt [moved from tools/perf/Documentation/perf-trace-python.txt with 89% similarity]
tools/perf/Documentation/perf-script.txt [moved from tools/perf/Documentation/perf-trace.txt with 62% similarity]
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-test.txt
tools/perf/Documentation/perf-timechart.txt
tools/perf/Documentation/perf-top.txt
tools/perf/MANIFEST
tools/perf/Makefile
tools/perf/bench/mem-memcpy-arch.h [new file with mode: 0644]
tools/perf/bench/mem-memcpy-x86-64-asm-def.h [new file with mode: 0644]
tools/perf/bench/mem-memcpy-x86-64-asm.S [new file with mode: 0644]
tools/perf/bench/mem-memcpy.c
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-diff.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-lock.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c [moved from tools/perf/builtin-trace.c with 83% similarity]
tools/perf/builtin-stat.c
tools/perf/builtin-test.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin.h
tools/perf/command-list.txt
tools/perf/feature-tests.mak
tools/perf/perf.c
tools/perf/scripts/perl/Perf-Trace-Util/Context.c
tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
tools/perf/scripts/perl/Perf-Trace-Util/README
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
tools/perf/scripts/perl/bin/failed-syscalls-report
tools/perf/scripts/perl/bin/rw-by-file-report
tools/perf/scripts/perl/bin/rw-by-pid-report
tools/perf/scripts/perl/bin/rwtop-report
tools/perf/scripts/perl/bin/wakeup-latency-report
tools/perf/scripts/perl/bin/workqueue-stats-report
tools/perf/scripts/perl/check-perf-trace.pl
tools/perf/scripts/perl/rw-by-file.pl
tools/perf/scripts/perl/workqueue-stats.pl
tools/perf/scripts/python/Perf-Trace-Util/Context.c
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
tools/perf/scripts/python/bin/futex-contention-report
tools/perf/scripts/python/bin/netdev-times-report
tools/perf/scripts/python/bin/sched-migration-report
tools/perf/scripts/python/bin/sctop-report
tools/perf/scripts/python/bin/syscall-counts-by-pid-report
tools/perf/scripts/python/bin/syscall-counts-report
tools/perf/scripts/python/check-perf-trace.py
tools/perf/scripts/python/failed-syscalls-by-pid.py
tools/perf/scripts/python/sched-migration.py
tools/perf/scripts/python/sctop.py
tools/perf/scripts/python/syscall-counts-by-pid.py
tools/perf/scripts/python/syscall-counts.py
tools/perf/util/build-id.c
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/include/asm/cpufeature.h [new file with mode: 0644]
tools/perf/util/include/asm/dwarf2.h [new file with mode: 0644]
tools/perf/util/include/linux/bitops.h
tools/perf/util/include/linux/linkage.h [new file with mode: 0644]
tools/perf/util/parse-events.c
tools/perf/util/parse-options.h
tools/perf/util/probe-event.c
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/sort.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/ui/util.c

diff --git a/CREDITS b/CREDITS
index 41d8e63d5165b5b786db6ab7d8c14fbc49fc0107..494b6e4746d7b9d08f0334bab61651341d285068 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2365,8 +2365,6 @@ E: acme@redhat.com
 W: http://oops.ghostprotocols.net:81/blog/
 P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD  841A B6AB 4681 9224 DF01
 D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
-S: R. Brasílio Itiberê, 4270/1010 - Água Verde
-S: 80240-060 - Curitiba - Paraná
 S: Brazil
 
 N: Karsten Merker
index 8b61c93609994dd91e36c25e1b29647ad084eaff..316c723a950c52d9bad3537ea8b15c1cabff2e5b 100644 (file)
@@ -1579,20 +1579,12 @@ and is between 256 and 4096 characters. It is defined in the file
 
        nmi_watchdog=   [KNL,BUGS=X86] Debugging features for SMP kernels
                        Format: [panic,][num]
-                       Valid num: 0,1,2
+                       Valid num: 0
                        0 - turn nmi_watchdog off
-                       1 - use the IO-APIC timer for the NMI watchdog
-                       2 - use the local APIC for the NMI watchdog using
-                       a performance counter. Note: This will use one
-                       performance counter and the local APIC's performance
-                       vector.
                        When panic is specified, panic when an NMI watchdog
                        timeout occurs.
                        This is useful when you use a panic=... timeout and
                        need the box quickly up again.
-                       Instead of 1 and 2 it is possible to use the following
-                       symbolic names: lapic and ioapic
-                       Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
 
        netpoll.carrier_timeout=
                        [NET] Specifies amount of time (in seconds) that
index 71e40f9118df5d9d1f002a84f48e8600ac533176..92e5b67105f05d35b24b7e691d1b7de4e03e6595 100644 (file)
@@ -4612,7 +4612,7 @@ PERFORMANCE EVENTS SUBSYSTEM
 M:     Peter Zijlstra <a.p.zijlstra@chello.nl>
 M:     Paul Mackerras <paulus@samba.org>
 M:     Ingo Molnar <mingo@elte.hu>
-M:     Arnaldo Carvalho de Melo <acme@redhat.com>
+M:     Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
 S:     Supported
 F:     kernel/perf_event*.c
 F:     include/linux/perf_event.h
index fe792ca818f64c4d9954e8b62e5f8064a5d5777e..5996e7a6757e4e058d4d7e7f9d10f76fbf6b6d20 100644 (file)
@@ -1,10 +1,4 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
-#ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
-#else
-static inline void init_hw_perf_events(void)    { }
-#endif
-
 #endif /* __ASM_ALPHA_PERF_EVENT_H */
index 5f77afb88e898b849f57e5c115f4688282e71c62..4c8bb374eb0a288d03d2cf5ad124e914638527f2 100644 (file)
@@ -112,8 +112,6 @@ init_IRQ(void)
        wrent(entInt, 0);
 
        alpha_mv.init_irq();
-
-       init_hw_perf_events();
 }
 
 /*
index 1cc49683fb69b2a5f96639e71a2f1af821479e77..90561c45e7d8928e8e137e33164d2d2d661a28b8 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kdebug.h>
 #include <linux/mutex.h>
+#include <linux/init.h>
 
 #include <asm/hwrpb.h>
 #include <asm/atomic.h>
@@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 /*
  * Init call to initialise performance events at kernel startup.
  */
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        pr_info("Performance events: ");
 
        if (!supported_cpu()) {
                pr_cont("No support for your CPU.\n");
-               return;
+               return 0;
        }
 
        pr_cont("Supported CPU type!\n");
@@ -881,6 +882,8 @@ void __init init_hw_perf_events(void)
        /* And set up PMU specification */
        alpha_pmu = &ev67_pmu;
 
-       perf_pmu_register(&pmu);
-}
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
+       return 0;
+}
+early_initcall(init_hw_perf_events);
index 07a50357492ac6858bc21d0b7913aefc93cbb1cf..fdfa4976b0bfeca637178609bb5e8d51e63ac672 100644 (file)
@@ -3034,11 +3034,11 @@ init_hw_perf_events(void)
                pr_info("no hardware support available\n");
        }
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
        return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 /*
  * Callchain handling code.
index 5c7c6fc07565bd468a9f78b327073f0212795a42..183e0d226669193700c72f405f1e4e43303612a4 100644 (file)
@@ -1047,6 +1047,6 @@ init_hw_perf_events(void)
 
        return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 #endif /* defined(CONFIG_CPU_MIPS32)... */
index 7c07de0d89436ea4bac7350de09ab73d5f89b2e4..b150b510510f167d2782f645999303dfa297e2ad 100644 (file)
@@ -126,4 +126,4 @@ static int init_e500_pmu(void)
        return register_fsl_emb_pmu(&e500_pmu);
 }
 
-arch_initcall(init_e500_pmu);
+early_initcall(init_e500_pmu);
index 09d72028f317755428865f5105ee6b76b829ad7a..2cc5e0301d0b532a2291e400cb7bdc0a87aa89cd 100644 (file)
@@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void)
        return register_power_pmu(&mpc7450_pmu);
 }
 
-arch_initcall(init_mpc7450_pmu);
+early_initcall(init_mpc7450_pmu);
index 3129c855933c2a3857b0c4b3321b259b851279b8..5674807057899cd09dc366d128b1769afc6cd3e9 100644 (file)
@@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
                freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
-       perf_pmu_register(&power_pmu);
+       perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(power_pmu_notifier);
 
        return 0;
index 7ecca59ddf77fe20bd46b470d9392cdd16fd5ba9..4dcf5f831e9d01f8694443ac7d56146b6f7a9777 100644 (file)
@@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
        pr_info("%s performance monitor hardware support registered\n",
                pmu->name);
 
-       perf_pmu_register(&fsl_emb_pmu);
+       perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
 
        return 0;
 }
index 2a361cdda635881ba4e96bf33f0a3dd6b8c273ea..ead8b3c2649ebba98c00423727e7dae54c6f5a7f 100644 (file)
@@ -613,4 +613,4 @@ static int init_power4_pmu(void)
        return register_power_pmu(&power4_pmu);
 }
 
-arch_initcall(init_power4_pmu);
+early_initcall(init_power4_pmu);
index 199de527d411446918651bd374bc5e9586ace46f..eca0ac595cb6c5b790ea4fd37d77ebb3a8ee474b 100644 (file)
@@ -682,4 +682,4 @@ static int init_power5p_pmu(void)
        return register_power_pmu(&power5p_pmu);
 }
 
-arch_initcall(init_power5p_pmu);
+early_initcall(init_power5p_pmu);
index 98b6a729a9dd127cc2c88e799b58bdbea2fa313c..d5ff0f64a5e645e01ddc6f9b56c91c60e14b204a 100644 (file)
@@ -621,4 +621,4 @@ static int init_power5_pmu(void)
        return register_power_pmu(&power5_pmu);
 }
 
-arch_initcall(init_power5_pmu);
+early_initcall(init_power5_pmu);
index 84a607bda8fbc129562943d7d2ce7fe0ee0f11bd..31603927e376e7e8854bf6a0faf86bc2bc9e56f7 100644 (file)
@@ -544,4 +544,4 @@ static int init_power6_pmu(void)
        return register_power_pmu(&power6_pmu);
 }
 
-arch_initcall(init_power6_pmu);
+early_initcall(init_power6_pmu);
index 852f7b7f6b4045801df807b997c7b110426ce7a6..593740fcb799d6fc9c29faca49425ad97b15b19a 100644 (file)
@@ -369,4 +369,4 @@ static int init_power7_pmu(void)
        return register_power_pmu(&power7_pmu);
 }
 
-arch_initcall(init_power7_pmu);
+early_initcall(init_power7_pmu);
index 3fee685de4df49e01a3a85ff069f3d409354c924..9a6e093858fe13fd30d2a79adb82e38e8c424b84 100644 (file)
@@ -494,4 +494,4 @@ static int init_ppc970_pmu(void)
        return register_power_pmu(&ppc970_pmu);
 }
 
-arch_initcall(init_ppc970_pmu);
+early_initcall(init_ppc970_pmu);
index dbf3b4bb71febb0ba38e9ec2f6c27731ea0970be..748955df018d801db05137f1831cf18f01938b55 100644 (file)
@@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void)
 
        return register_sh_pmu(&sh7750_pmu);
 }
-arch_initcall(sh7750_pmu_init);
+early_initcall(sh7750_pmu_init);
index 580276525731531643c9165d5c45ce28f5eade20..17e6bebfede067c26379efde6b8e0a69fc6565e9 100644 (file)
@@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void)
 
        return register_sh_pmu(&sh4a_pmu);
 }
-arch_initcall(sh4a_pmu_init);
+early_initcall(sh4a_pmu_init);
index 5a4b33435650c8ea108668d8e7e30786a20bd335..2ee21a47b5af6e1aac2889712c69848b7194f317 100644 (file)
@@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
 
        WARN_ON(_pmu->num_events > MAX_HWEVENTS);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(sh_pmu_notifier);
        return 0;
 }
index 6e8bfa1786dab1f45d3dff5a1dcacc31a08d4844..4d3dbe3703e9001f53f65f64d38629bda6dedb63 100644 (file)
@@ -4,8 +4,6 @@
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 
-extern void init_hw_perf_events(void);
-
 #define perf_arch_fetch_caller_regs(regs, ip)          \
 do {                                                   \
        unsigned long _pstate, _asi, _pil, _i7, _fp;    \
@@ -26,8 +24,6 @@ do {                                                  \
        (regs)->u_regs[UREG_I6] = _fp;                  \
        (regs)->u_regs[UREG_I7] = _i7;                  \
 } while (0)
-#else
-static inline void init_hw_perf_events(void)   { }
 #endif
 
 #endif
index a4bd7ba74c89d9f25221f29e616f49f26517aad5..300f810142f57e82cce5984a56d9e5e920aebb7d 100644 (file)
@@ -270,8 +270,6 @@ int __init nmi_init(void)
                        atomic_set(&nmi_active, -1);
                }
        }
-       if (!err)
-               init_hw_perf_events();
 
        return err;
 }
index 0d6deb55a2ae7e4189b5ab60aec81cd8df28adb6..760578687e7ca86cb0bb63cc7dc68721d51bee90 100644 (file)
@@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void)
        return false;
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        pr_info("Performance events: ");
 
        if (!supported_pmu()) {
                pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
-               return;
+               return 0;
        }
 
        pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        register_die_notifier(&perf_event_nmi_notifier);
+
+       return 0;
 }
+early_initcall(init_hw_perf_events);
 
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
                           struct pt_regs *regs)
index 76561d20ea2f27f0edfd0eee6d043b98c6aa6e90..4a2adaa9aefcc1fe2e42f7c01e29cb3ec61f5842 100644 (file)
@@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
  */
+struct text_poke_param {
+       void *addr;
+       const void *opcode;
+       size_t len;
+};
+
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
+extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 #define IDEAL_NOP_SIZE_5 5
index 13b0ebaa512f77764e06956632b32755f8ca2bfa..ba870bb6dd8ef30ab81a317a8eb43dcb83066630 100644 (file)
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq)
        return ((irq == 2) ? 9 : irq);
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-# define ARCH_HAS_NMI_WATCHDOG
-#endif
-
 #ifdef CONFIG_X86_32
 extern void irq_ctx_init(int cpu);
 #else
index 5bdfca86581beb3b45c60fd1f8d900a5daa68bf9..f23eb2528464f4a51ad6d14d70db0f7c33d92e0a 100644 (file)
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_registers(struct pt_regs *regs);
 extern void show_trace(struct task_struct *t, struct pt_regs *regs,
-                      unsigned long *sp, unsigned long bp);
+                      unsigned long *sp);
 extern void __show_regs(struct pt_regs *regs, int all);
 extern void show_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
index 6b89f5e860214266d7270160f739e9a9be290802..86030f63ba02cf0947920a2bdd35a1f0d13ea833 100644 (file)
 #define MSR_AMD64_IBSCTL               0xc001103a
 #define MSR_AMD64_IBSBRTARGET          0xc001103b
 
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL              0xc0010200
+#define MSR_F15H_PERF_CTR              0xc0010201
+
 /* Fam 10h MSRs */
 #define MSR_FAM10H_MMIO_CONF_BASE      0xc0010058
 #define FAM10H_MMIO_CONF_ENABLE                (1<<0)
index 932f0f86b4b76252e6e6434ab9c15d81c3b17004..c4021b9535102547712c92203e8afe5809e567fc 100644 (file)
@@ -5,41 +5,15 @@
 #include <asm/irq.h>
 #include <asm/io.h>
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
-/**
- * do_nmi_callback
- *
- * Check to see if a callback exists and execute it.  Return 1
- * if the handler exists and was handled successfully.
- */
-int do_nmi_callback(struct pt_regs *regs, int cpu);
+#ifdef CONFIG_X86_LOCAL_APIC
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
-extern int check_nmi_watchdog(void);
-#if !defined(CONFIG_LOCKUP_DETECTOR)
-extern int nmi_watchdog_enabled;
-#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern int reserve_evntsel_nmi(unsigned int);
 extern void release_evntsel_nmi(unsigned int);
 
-extern void setup_apic_nmi_watchdog(void *);
-extern void stop_apic_nmi_watchdog(void *);
-extern void disable_timer_nmi_watchdog(void);
-extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
-extern void cpu_nmi_set_wd_enabled(void);
-
-extern atomic_t nmi_active;
-extern unsigned int nmi_watchdog;
-#define NMI_NONE       0
-#define NMI_IO_APIC    1
-#define NMI_LOCAL_APIC 2
-#define NMI_INVALID    3
-
 struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
                        void __user *, size_t *, loff_t *);
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic;
 
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
-
-static inline void localise_nmi_watchdog(void)
-{
-       if (nmi_watchdog == NMI_IO_APIC)
-               nmi_watchdog = NMI_LOCAL_APIC;
-}
-
-/* check if nmi_watchdog is active (ie was specified at boot) */
-static inline int nmi_watchdog_active(void)
-{
-       /*
-        * actually it should be:
-        *      return (nmi_watchdog == NMI_LOCAL_APIC ||
-        *              nmi_watchdog == NMI_IO_APIC)
-        * but since they are power of two we could use a
-        * cheaper way --cvg
-        */
-       return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
-}
 #endif
 
-void lapic_watchdog_stop(void);
-int lapic_watchdog_init(unsigned nmi_hz);
-int lapic_wd_event(unsigned nmi_hz);
-unsigned lapic_adjust_nmi_hz(unsigned hz);
-void disable_lapic_nmi_watchdog(void);
-void enable_lapic_nmi_watchdog(void);
 void stop_nmi(void);
 void restart_nmi(void);
 
index 550e26b1dbb3593f324910f0197402966ae91299..d9d4dae305f6991efa446ec0ef4e7fad8054bdc3 100644 (file)
@@ -125,7 +125,6 @@ union cpuid10_edx {
 #define IBS_OP_MAX_CNT_EXT     0x007FFFFFULL   /* not a register bit mask */
 
 #ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
 extern void perf_events_lapic_init(void);
 
 #define PERF_EVENT_INDEX_OFFSET                        0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 }
 
 #else
-static inline void init_hw_perf_events(void)           { }
 static inline void perf_events_lapic_init(void)        { }
 #endif
 
index a70cd216be5d729db1f364340f911d632819f18d..295e2ff18a6a80be6ec3425d91e73239c76f1198 100644 (file)
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS {
 };
 
 /*
- * P4 PEBS specifics (Replay Event only)
- *
- * Format (bits):
- *   0-6: metric from P4_PEBS_METRIC enum
- *    7 : reserved
- *    8 : reserved
- * 9-11 : reserved
- *
  * Note we have UOP and PEBS bits reserved for now
  * just in case if we will need them once
  */
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC {
        P4_PEBS_METRIC__max
 };
 
+/*
+ * Notes on internal configuration of ESCR+CCCR tuples
+ *
+ * Since P4 has quite the different architecture of
+ * performance registers in compare with "architectural"
+ * once and we have on 64 bits to keep configuration
+ * of performance event, the following trick is used.
+ *
+ * 1) Since both ESCR and CCCR registers have only low
+ *    32 bits valuable, we pack them into a single 64 bit
+ *    configuration. Low 32 bits of such config correspond
+ *    to low 32 bits of CCCR register and high 32 bits
+ *    correspond to low 32 bits of ESCR register.
+ *
+ * 2) The meaning of every bit of such config field can
+ *    be found in Intel SDM but it should be noted that
+ *    we "borrow" some reserved bits for own usage and
+ *    clean them or set to a proper value when we do
+ *    a real write to hardware registers.
+ *
+ * 3) The format of bits of config is the following
+ *    and should be either 0 or set to some predefined
+ *    values:
+ *
+ *    Low 32 bits
+ *    -----------
+ *      0-6: P4_PEBS_METRIC enum
+ *     7-11:                    reserved
+ *       12:                    reserved (Enable)
+ *    13-15:                    reserved (ESCR select)
+ *    16-17: Active Thread
+ *       18: Compare
+ *       19: Complement
+ *    20-23: Threshold
+ *       24: Edge
+ *       25:                    reserved (FORCE_OVF)
+ *       26:                    reserved (OVF_PMI_T0)
+ *       27:                    reserved (OVF_PMI_T1)
+ *    28-29:                    reserved
+ *       30:                    reserved (Cascade)
+ *       31:                    reserved (OVF)
+ *
+ *    High 32 bits
+ *    ------------
+ *        0:                    reserved (T1_USR)
+ *        1:                    reserved (T1_OS)
+ *        2:                    reserved (T0_USR)
+ *        3:                    reserved (T0_OS)
+ *        4: Tag Enable
+ *      5-8: Tag Value
+ *     9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
+ *    25-30: enum P4_EVENTS
+ *       31:                    reserved (HT thread)
+ */
+
 #endif /* PERF_EVENT_P4_H */
 
index 1def60114906bf1b1dfa98ef3bd8ac177f65010f..6c22bf353f26495b1fa71dc5a92cdaa05e5b1d8e 100644 (file)
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
                setup_IO_APIC();
        else {
                nr_ioapics = 0;
-               localise_nmi_watchdog();
        }
 #endif
 }
index 2b16a2ad23dc6b9647028c0808f8f45b094e74ac..52b5c7ed3608d9fc439c5ca69bf58ca1b8ebef88 100644 (file)
@@ -7,6 +7,7 @@
 #define _ASM_X86_STACKTRACE_H
 
 #include <linux/uaccess.h>
+#include <linux/ptrace.h>
 
 extern int kstack_depth_to_print;
 
@@ -46,7 +47,7 @@ struct stacktrace_ops {
 };
 
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+               unsigned long *stack,
                const struct stacktrace_ops *ops, void *data);
 
 #ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
 #endif
 
+#ifdef CONFIG_FRAME_POINTER
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+       unsigned long bp;
+
+       if (regs)
+               return regs->bp;
+
+       if (task == current) {
+               /* Grab bp right from our regs */
+               get_bp(bp);
+               return bp;
+       }
+
+       /* bp is the last reg pushed by switch_to */
+       return *(unsigned long *)task->thread.sp;
+}
+#else
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+       return 0;
+}
+#endif
+
 extern void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp, char *log_lvl);
+                  unsigned long *stack, char *log_lvl);
 
 extern void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *sp, unsigned long bp, char *log_lvl);
+                  unsigned long *sp, char *log_lvl);
 
 extern unsigned int code_bytes;
 
index 5469630b27f56d732b10036ba381df39f28ca52b..fa7b9176b76cb33820034403fd8f4a50dc49709c 100644 (file)
 unsigned long long native_sched_clock(void);
 extern int recalibrate_cpu_khz(void);
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-extern int timer_ack;
-#else
-# define timer_ack (0)
-#endif
-
 extern int no_timer_check;
 
 /* Accelerators for sched_clock()
index 5079f24c955a2d3b9b66532cd2c8cd45e5116d14..553d0b0d639bf4b8ef1eca720e7fe43f0fa1e662 100644 (file)
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first;
 static int wrote_text;
 
 struct text_poke_params {
-       void *addr;
-       const void *opcode;
-       size_t len;
+       struct text_poke_param *params;
+       int nparams;
 };
 
 static int __kprobes stop_machine_text_poke(void *data)
 {
        struct text_poke_params *tpp = data;
+       struct text_poke_param *p;
+       int i;
 
        if (atomic_dec_and_test(&stop_machine_first)) {
-               text_poke(tpp->addr, tpp->opcode, tpp->len);
+               for (i = 0; i < tpp->nparams; i++) {
+                       p = &tpp->params[i];
+                       text_poke(p->addr, p->opcode, p->len);
+               }
                smp_wmb();      /* Make sure other cpus see that this has run */
                wrote_text = 1;
        } else {
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data)
                smp_mb();       /* Load wrote_text before following execution */
        }
 
-       flush_icache_range((unsigned long)tpp->addr,
-                          (unsigned long)tpp->addr + tpp->len);
+       for (i = 0; i < tpp->nparams; i++) {
+               p = &tpp->params[i];
+               flush_icache_range((unsigned long)p->addr,
+                                  (unsigned long)p->addr + p->len);
+       }
+
        return 0;
 }
 
@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
 void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
 {
        struct text_poke_params tpp;
+       struct text_poke_param p;
 
-       tpp.addr = addr;
-       tpp.opcode = opcode;
-       tpp.len = len;
+       p.addr = addr;
+       p.opcode = opcode;
+       p.len = len;
+       tpp.params = &p;
+       tpp.nparams = 1;
        atomic_set(&stop_machine_first, 1);
        wrote_text = 0;
        /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
        return addr;
 }
 
+/**
+ * text_poke_smp_batch - Update instructions on a live kernel on SMP
+ * @params: an array of text_poke parameters
+ * @n: the number of elements in params.
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. Since the
+ * stop_machine() is heavy task, it is better to aggregate text_poke requests
+ * and do it once if possible.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+{
+       struct text_poke_params tpp = {.params = params, .nparams = n};
+
+       atomic_set(&stop_machine_first, 1);
+       wrote_text = 0;
+       stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+}
+
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 
 #ifdef CONFIG_X86_64
index 910f20b457c464d34f1e9874269d652ea0da325e..3966b564ea478746bc77d66886249a294177b21a 100644 (file)
@@ -3,10 +3,7 @@
 #
 
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
-obj-$(CONFIG_X86_LOCAL_APIC)   += nmi.o
-endif
-obj-$(CONFIG_HARDLOCKUP_DETECTOR)      += hw_nmi.o
+obj-y                          += hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
 obj-$(CONFIG_SMP)              += ipi.o
index 78218135b48e6169d155fb4a097e5b6c8e30e53a..fb7657822aadd7cb0954f6483490b5e9a0279f60 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/dmi.h>
-#include <linux/nmi.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 
@@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void)
         * PIT/HPET going.  Otherwise register lapic as a dummy
         * device.
         */
-       if (nmi_watchdog != NMI_IO_APIC)
-               lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-       else
-               pr_warning("APIC timer registered as dummy,"
-                       " due to nmi_watchdog=%d!\n", nmi_watchdog);
+       lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 
        /* Setup the lapic or request the broadcast */
        setup_APIC_timer();
@@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void)
        }
 #endif
 
-       setup_apic_nmi_watchdog(NULL);
        apic_pm_activate();
 
        /*
@@ -1758,17 +1752,10 @@ int __init APIC_init_uniprocessor(void)
                setup_IO_APIC();
        else {
                nr_ioapics = 0;
-               localise_nmi_watchdog();
        }
-#else
-       localise_nmi_watchdog();
 #endif
 
        x86_init.timers.setup_percpu_clockev();
-#ifdef CONFIG_X86_64
-       check_nmi_watchdog();
-#endif
-
        return 0;
 }
 
index 62f6e1e55b90d7f9a2bc460e73ba8895da23c4ff..c57d0b5994489df472d38566fb6a65da45894c7c 100644 (file)
 #include <linux/nmi.h>
 #include <linux/module.h>
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(void)
 {
        return (u64)(cpu_khz) * 1000 * 60;
 }
+#endif
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
+#ifdef arch_trigger_all_cpu_backtrace
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 
@@ -91,18 +92,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
 }
 early_initcall(register_trigger_all_cpu_backtrace);
 #endif
-
-/* STUB calls to mimic old nmi_watchdog behaviour */
-#if defined(CONFIG_X86_LOCAL_APIC)
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
-#endif
-atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
-void cpu_nmi_set_wd_enabled(void) { return; }
-void stop_apic_nmi_watchdog(void *unused) { return; }
-void setup_apic_nmi_watchdog(void *unused) { return; }
-int __init check_nmi_watchdog(void) { return 0; }
index fadcd743a74f8bdcd5effbaf7e28b01ea3003532..16c2db8750a24d84e339d47b1b9c2341cd144b8c 100644 (file)
@@ -54,7 +54,6 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
@@ -2642,24 +2641,6 @@ static void lapic_register_intr(int irq)
                                      "edge");
 }
 
-static void __init setup_nmi(void)
-{
-       /*
-        * Dirty trick to enable the NMI watchdog ...
-        * We put the 8259A master into AEOI mode and
-        * unmask on all local APICs LVT0 as NMI.
-        *
-        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-        * is from Maciej W. Rozycki - so we do not have to EOI from
-        * the NMI handler or the timer interrupt.
-        */
-       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-       enable_NMI_through_LVT0();
-
-       apic_printk(APIC_VERBOSE, " done.\n");
-}
-
 /*
  * This looks a bit hackish but it's about the only one way of sending
  * a few INTA cycles to 8259As and any associated glue logic.  ICR does
@@ -2765,15 +2746,6 @@ static inline void __init check_timer(void)
         */
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        legacy_pic->init(1);
-#ifdef CONFIG_X86_32
-       {
-               unsigned int ver;
-
-               ver = apic_read(APIC_LVR);
-               ver = GET_APIC_VERSION(ver);
-               timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-       }
-#endif
 
        pin1  = find_isa_irq_pin(0, mp_INT);
        apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2793,6 @@ static inline void __init check_timer(void)
                                unmask_ioapic(cfg);
                }
                if (timer_irq_works()) {
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               setup_nmi();
-                               legacy_pic->unmask(0);
-                       }
                        if (disable_timer_pin_1 > 0)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
@@ -2850,11 +2818,6 @@ static inline void __init check_timer(void)
                if (timer_irq_works()) {
                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
                        timer_through_8259 = 1;
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               legacy_pic->mask(0);
-                               setup_nmi();
-                               legacy_pic->unmask(0);
-                       }
                        goto out;
                }
                /*
@@ -2866,15 +2829,6 @@ static inline void __init check_timer(void)
                apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
        }
 
-       if (nmi_watchdog == NMI_IO_APIC) {
-               apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-                           "through the IO-APIC - disabling NMI Watchdog!\n");
-               nmi_watchdog = NMI_NONE;
-       }
-#ifdef CONFIG_X86_32
-       timer_ack = 0;
-#endif
-
        apic_printk(APIC_QUIET, KERN_INFO
                    "...trying to set up timer as Virtual Wire IRQ...\n");
 
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644 (file)
index c90041c..0000000
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- *  NMI watchdog support on APIC systems
- *
- *  Started by Ingo Molnar <mingo@redhat.com>
- *
- *  Fixes:
- *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
- *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
- *  Mikael Pettersson  : Pentium 4 support for local APIC NMI watchdog.
- *  Pavel Machek and
- *  Mikael Pettersson  : PM converted to driver model. Disable/enable API.
- */
-
-#include <asm/apic.h>
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/percpu.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kernel_stat.h>
-#include <linux/kdebug.h>
-#include <linux/smp.h>
-
-#include <asm/i8259.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/timer.h>
-
-#include <asm/mce.h>
-
-#include <asm/mach_traps.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- *     be enabled
- *  0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0);          /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-
-static int panic_on_timeout;
-
-static unsigned int nmi_hz = HZ;
-static DEFINE_PER_CPU(short, wd_enabled);
-static int endflag __initdata;
-
-static inline unsigned int get_nmi_count(int cpu)
-{
-       return per_cpu(irq_stat, cpu).__nmi_count;
-}
-
-static inline int mce_in_progress(void)
-{
-#if defined(CONFIG_X86_MCE)
-       return atomic_read(&mce_entry) > 0;
-#endif
-       return 0;
-}
-
-/*
- * Take the local apic timer and PIT/HPET into account. We don't
- * know which one is active, when we have highres/dyntick on
- */
-static inline unsigned int get_timer_irqs(int cpu)
-{
-       return per_cpu(irq_stat, cpu).apic_timer_irqs +
-               per_cpu(irq_stat, cpu).irq0_irqs;
-}
-
-#ifdef CONFIG_SMP
-/*
- * The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
-       local_irq_enable_in_hardirq();
-       /*
-        * Intentionally don't use cpu_relax here. This is
-        * to make sure that the performance counter really ticks,
-        * even if there is a simulator or similar that catches the
-        * pause instruction. On a real HT machine this is fine because
-        * all other CPUs are busy with "useless" delay loops and don't
-        * care if they get somewhat less cycles.
-        */
-       while (endflag == 0)
-               mb();
-}
-#endif
-
-static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
-{
-       printk(KERN_CONT "\n");
-
-       printk(KERN_WARNING
-               "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
-                       cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
-
-       printk(KERN_WARNING
-               "Please report this to bugzilla.kernel.org,\n");
-       printk(KERN_WARNING
-               "and attach the output of the 'dmesg' command.\n");
-
-       per_cpu(wd_enabled, cpu) = 0;
-       atomic_dec(&nmi_active);
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
-       apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-int __init check_nmi_watchdog(void)
-{
-       unsigned int *prev_nmi_count;
-       int cpu;
-
-       if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
-               return 0;
-
-       prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
-       if (!prev_nmi_count)
-               goto error;
-
-       printk(KERN_INFO "Testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
-#endif
-
-       for_each_possible_cpu(cpu)
-               prev_nmi_count[cpu] = get_nmi_count(cpu);
-       local_irq_enable();
-       mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
-
-       for_each_online_cpu(cpu) {
-               if (!per_cpu(wd_enabled, cpu))
-                       continue;
-               if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
-                       report_broken_nmi(cpu, prev_nmi_count);
-       }
-       endflag = 1;
-       if (!atomic_read(&nmi_active)) {
-               kfree(prev_nmi_count);
-               atomic_set(&nmi_active, -1);
-               goto error;
-       }
-       printk("OK.\n");
-
-       /*
-        * now that we know it works we can reduce NMI frequency to
-        * something more reasonable; makes a difference in some configs
-        */
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               nmi_hz = lapic_adjust_nmi_hz(1);
-
-       kfree(prev_nmi_count);
-       return 0;
-error:
-       if (nmi_watchdog == NMI_IO_APIC) {
-               if (!timer_through_8259)
-                       legacy_pic->mask(0);
-               on_each_cpu(__acpi_nmi_disable, NULL, 1);
-       }
-
-#ifdef CONFIG_X86_32
-       timer_ack = 0;
-#endif
-       return -1;
-}
-
-static int __init setup_nmi_watchdog(char *str)
-{
-       unsigned int nmi;
-
-       if (!strncmp(str, "panic", 5)) {
-               panic_on_timeout = 1;
-               str = strchr(str, ',');
-               if (!str)
-                       return 1;
-               ++str;
-       }
-
-       if (!strncmp(str, "lapic", 5))
-               nmi_watchdog = NMI_LOCAL_APIC;
-       else if (!strncmp(str, "ioapic", 6))
-               nmi_watchdog = NMI_IO_APIC;
-       else {
-               get_option(&str, &nmi);
-               if (nmi >= NMI_INVALID)
-                       return 0;
-               nmi_watchdog = nmi;
-       }
-
-       return 1;
-}
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-/*
- * Suspend/resume support
- */
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
-       /* only CPU0 goes here, other CPUs should be offline */
-       nmi_pm_active = atomic_read(&nmi_active);
-       stop_apic_nmi_watchdog(NULL);
-       BUG_ON(atomic_read(&nmi_active) != 0);
-       return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
-       /* only CPU0 goes here, other CPUs should be offline */
-       if (nmi_pm_active > 0) {
-               setup_apic_nmi_watchdog(NULL);
-               touch_nmi_watchdog();
-       }
-       return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
-       .name           = "lapic_nmi",
-       .resume         = lapic_nmi_resume,
-       .suspend        = lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
-       .id     = 0,
-       .cls    = &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
-       int error;
-
-       /*
-        * should really be a BUG_ON but b/c this is an
-        * init call, it just doesn't work.  -dcz
-        */
-       if (nmi_watchdog != NMI_LOCAL_APIC)
-               return 0;
-
-       if (atomic_read(&nmi_active) < 0)
-               return 0;
-
-       error = sysdev_class_register(&nmi_sysclass);
-       if (!error)
-               error = sysdev_register(&device_lapic_nmi);
-       return error;
-}
-
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif /* CONFIG_PM */
-
-static void __acpi_nmi_enable(void *__unused)
-{
-       apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
-       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_enable, NULL, 1);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
-       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_disable, NULL, 1);
-}
-
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
-       __get_cpu_var(wd_enabled) = 1;
-}
-
-void setup_apic_nmi_watchdog(void *unused)
-{
-       if (__get_cpu_var(wd_enabled))
-               return;
-
-       /* cheap hack to support suspend/resume */
-       /* if cpu0 is not active neither should the other cpus */
-       if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
-               return;
-
-       switch (nmi_watchdog) {
-       case NMI_LOCAL_APIC:
-               if (lapic_watchdog_init(nmi_hz) < 0) {
-                       __get_cpu_var(wd_enabled) = 0;
-                       return;
-               }
-               /* FALL THROUGH */
-       case NMI_IO_APIC:
-               __get_cpu_var(wd_enabled) = 1;
-               atomic_inc(&nmi_active);
-       }
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
-       /* only support LOCAL and IO APICs for now */
-       if (!nmi_watchdog_active())
-               return;
-       if (__get_cpu_var(wd_enabled) == 0)
-               return;
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               lapic_watchdog_stop();
-       else
-               __acpi_nmi_disable(NULL);
-       __get_cpu_var(wd_enabled) = 0;
-       atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up here too!]
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(long, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
-       if (nmi_watchdog_active()) {
-               unsigned cpu;
-
-               /*
-                * Tell other CPUs to reset their alert counters. We cannot
-                * do it ourselves because the alert count increase is not
-                * atomic.
-                */
-               for_each_present_cpu(cpu) {
-                       if (per_cpu(nmi_touch, cpu) != 1)
-                               per_cpu(nmi_touch, cpu) = 1;
-               }
-       }
-
-       /*
-        * Tickle the softlockup detector too:
-        */
-       touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
-{
-       /*
-        * Since current_thread_info()-> is always on the stack, and we
-        * always switch the stack NMI-atomically, it's safe to use
-        * smp_processor_id().
-        */
-       unsigned int sum;
-       int touched = 0;
-       int cpu = smp_processor_id();
-       int rc = 0;
-
-       sum = get_timer_irqs(cpu);
-
-       if (__get_cpu_var(nmi_touch)) {
-               __get_cpu_var(nmi_touch) = 0;
-               touched = 1;
-       }
-
-       /* We can be called before check_nmi_watchdog, hence NULL check. */
-       if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-               static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
-
-               raw_spin_lock(&lock);
-               printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
-               show_regs(regs);
-               dump_stack();
-               raw_spin_unlock(&lock);
-               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-
-               rc = 1;
-       }
-
-       /* Could check oops_in_progress here too, but it's safer not to */
-       if (mce_in_progress())
-               touched = 1;
-
-       /* if the none of the timers isn't firing, this cpu isn't doing much */
-       if (!touched && __get_cpu_var(last_irq_sum) == sum) {
-               /*
-                * Ayiee, looks like this CPU is stuck ...
-                * wait a few IRQs (5 seconds) before doing the oops ...
-                */
-               __this_cpu_inc(alert_counter);
-               if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
-                       /*
-                        * die_nmi will return ONLY if NOTIFY_STOP happens..
-                        */
-                       die_nmi("BUG: NMI Watchdog detected LOCKUP",
-                               regs, panic_on_timeout);
-       } else {
-               __get_cpu_var(last_irq_sum) = sum;
-               __this_cpu_write(alert_counter, 0);
-       }
-
-       /* see if the nmi watchdog went off */
-       if (!__get_cpu_var(wd_enabled))
-               return rc;
-       switch (nmi_watchdog) {
-       case NMI_LOCAL_APIC:
-               rc |= lapic_wd_event(nmi_hz);
-               break;
-       case NMI_IO_APIC:
-               /*
-                * don't know how to accurately check for this.
-                * just assume it was a watchdog timer interrupt
-                * This matches the old behaviour.
-                */
-               rc = 1;
-               break;
-       }
-       return rc;
-}
-
-#ifdef CONFIG_SYSCTL
-
-static void enable_ioapic_nmi_watchdog_single(void *unused)
-{
-       __get_cpu_var(wd_enabled) = 1;
-       atomic_inc(&nmi_active);
-       __acpi_nmi_enable(NULL);
-}
-
-static void enable_ioapic_nmi_watchdog(void)
-{
-       on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
-       touch_nmi_watchdog();
-}
-
-static void disable_ioapic_nmi_watchdog(void)
-{
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-}
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
-       unknown_nmi_panic = 1;
-       return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-       unsigned char reason = get_nmi_reason();
-       char buf[64];
-
-       sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-       die_nmi(buf, regs, 1); /* Always panic here */
-       return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write,
-                       void __user *buffer, size_t *length, loff_t *ppos)
-{
-       int old_state;
-
-       nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
-       old_state = nmi_watchdog_enabled;
-       proc_dointvec(table, write, buffer, length, ppos);
-       if (!!old_state == !!nmi_watchdog_enabled)
-               return 0;
-
-       if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
-               printk(KERN_WARNING
-                       "NMI watchdog is permanently disabled\n");
-               return -EIO;
-       }
-
-       if (nmi_watchdog == NMI_LOCAL_APIC) {
-               if (nmi_watchdog_enabled)
-                       enable_lapic_nmi_watchdog();
-               else
-                       disable_lapic_nmi_watchdog();
-       } else if (nmi_watchdog == NMI_IO_APIC) {
-               if (nmi_watchdog_enabled)
-                       enable_ioapic_nmi_watchdog();
-               else
-                       disable_ioapic_nmi_watchdog();
-       } else {
-               printk(KERN_WARNING
-                       "NMI watchdog doesn't know what hardware to touch\n");
-               return -EIO;
-       }
-       return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-       if (unknown_nmi_panic)
-               return unknown_nmi_panic_callback(regs, cpu);
-#endif
-       return 0;
-}
-
-void arch_trigger_all_cpu_backtrace(void)
-{
-       int i;
-
-       cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-
-       printk(KERN_INFO "sending NMI to all CPUs:\n");
-       apic->send_IPI_all(NMI_VECTOR);
-
-       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
-       for (i = 0; i < 10 * 1000; i++) {
-               if (cpumask_empty(to_cpumask(backtrace_mask)))
-                       break;
-               mdelay(1);
-       }
-}
index 4b68bda30938d0a55ed39eeaeff68157266a9ea0..1d59834396bdc145c630e671d1bccd7769689a88 100644 (file)
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
 #else
        vgetcpu_set_mode();
 #endif
-       init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
index 6d75b9145b13f0e68a106acd76b0d458c827d099..0a360d146596b6d01f8c833e655150faa3ae9605 100644 (file)
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
 {
        int i;
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               disable_lapic_nmi_watchdog();
-
        for (i = 0; i < x86_pmu.num_counters; i++) {
                if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
                        goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
        for (i--; i >= 0; i--)
                release_perfctr_nmi(x86_pmu.perfctr + i);
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               enable_lapic_nmi_watchdog();
-
        return false;
 }
 
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
                release_perfctr_nmi(x86_pmu.perfctr + i);
                release_evntsel_nmi(x86_pmu.eventsel + i);
        }
-
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               enable_lapic_nmi_watchdog();
 }
 
 #else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
 static bool check_hw_exists(void)
 {
        u64 val, val_new = 0;
-       int ret = 0;
+       int i, reg, ret = 0;
+
+       /*
+        * Check to see if the BIOS enabled any of the counters, if so
+        * complain and bail.
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               reg = x86_pmu.eventsel + i;
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
+                       goto bios_fail;
+       }
 
+       if (x86_pmu.num_counters_fixed) {
+               reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+                       if (val & (0x03 << i*4))
+                               goto bios_fail;
+               }
+       }
+
+       /*
+        * Now write a value and read it back to see if it matches,
+        * this is needed to detect certain hardware emulators (qemu/kvm)
+        * that don't trap on the MSR access and always return 0s.
+        */
        val = 0xabcdUL;
-       ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+       ret = checking_wrmsrl(x86_pmu.perfctr, val);
        ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
        if (ret || val != val_new)
-               return false;
+               goto msr_fail;
 
        return true;
+
+bios_fail:
+       printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+       printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
+       return false;
+
+msr_fail:
+       printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+       return false;
 }
 
 static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        struct hw_perf_event *hwc = &event->hw;
        u64 config;
 
-       if (!hwc->sample_period) {
+       if (!is_sampling_event(event)) {
                hwc->sample_period = x86_pmu.max_period;
                hwc->last_period = hwc->sample_period;
                local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void)
        pr_info("no hardware sampling interrupt available.\n");
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        struct event_constraint *c;
        int err;
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void)
                err = amd_pmu_init();
                break;
        default:
-               return;
+               return 0;
        }
        if (err != 0) {
                pr_cont("no PMU driver, software events only.\n");
-               return;
+               return 0;
        }
 
        pmu_check_apic();
 
        /* sanity check that the hardware exists or is emulated */
-       if (!check_hw_exists()) {
-               pr_cont("Broken PMU hardware detected, software events only.\n");
-               return;
-       }
+       if (!check_hw_exists())
+               return 0;
 
        pr_cont("%s PMU driver.\n", x86_pmu.name);
 
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void)
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(x86_pmu_notifier);
+
+       return 0;
 }
+early_initcall(init_hw_perf_events);
 
 static inline void x86_pmu_read(struct perf_event *event)
 {
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
        perf_callchain_store(entry, regs->ip);
 
-       dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
+       dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
 }
 
 #ifdef CONFIG_COMPAT
index e421b8cd6944af860c4b28a1a176a14320ac9f79..67e2202a60393cd48a0f2251862c59f65bf7e667 100644 (file)
@@ -1,7 +1,5 @@
 #ifdef CONFIG_CPU_SUP_AMD
 
-static DEFINE_RAW_SPINLOCK(amd_nb_lock);
-
 static __initconst const u64 amd_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
        return &emptyconstraint;
 }
 
-static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+static struct amd_nb *amd_alloc_nb(int cpu)
 {
        struct amd_nb *nb;
        int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
        if (!nb)
                return NULL;
 
-       nb->nb_id = nb_id;
+       nb->nb_id = -1;
 
        /*
         * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
        if (boot_cpu_data.x86_max_cores < 2)
                return NOTIFY_OK;
 
-       cpuc->amd_nb = amd_alloc_nb(cpu, -1);
+       cpuc->amd_nb = amd_alloc_nb(cpu);
        if (!cpuc->amd_nb)
                return NOTIFY_BAD;
 
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
        nb_id = amd_get_nb_id(cpu);
        WARN_ON_ONCE(nb_id == BAD_APICID);
 
-       raw_spin_lock(&amd_nb_lock);
-
        for_each_online_cpu(i) {
                nb = per_cpu(cpu_hw_events, i).amd_nb;
                if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
 
        cpuc->amd_nb->nb_id = nb_id;
        cpuc->amd_nb->refcnt++;
-
-       raw_spin_unlock(&amd_nb_lock);
 }
 
 static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
 
        cpuhw = &per_cpu(cpu_hw_events, cpu);
 
-       raw_spin_lock(&amd_nb_lock);
-
        if (cpuhw->amd_nb) {
                struct amd_nb *nb = cpuhw->amd_nb;
 
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
 
                cpuhw->amd_nb = NULL;
        }
-
-       raw_spin_unlock(&amd_nb_lock);
 }
 
 static __initconst const struct x86_pmu amd_pmu = {
index c8f5c088cad11ae3f245e1e7374bb43c915170d6..24e390e40f2e0b484d4b2b09084deb9d120d59b4 100644 (file)
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
        if (ret)
                return ret;
 
+       if (event->attr.precise_ip &&
+           (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use INST_RETIRED.ANY_P
+                * (0x00c0), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * INST_RETIRED.ANY_P counts the number of cycles that retires
+                * CNTMASK instructions. By setting CNTMASK to a value (16)
+                * larger than the maximum number of instructions that can be
+                * retired per cycle (4) and then inverting the condition, we
+                * count all cycles that retire 16 or less instructions, which
+                * is every cycle.
+                *
+                * Thereby we gain a PEBS capable cycle counter.
+                */
+               u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+
        if (event->attr.type != PERF_TYPE_RAW)
                return 0;
 
index d9f4ff8fcd693c509b2d079b381a8e8683d10d9c..d5a236615501fd6a41fb6f6bc76bfd2369a47bc5 100644 (file)
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/smp.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
 
-struct nmi_watchdog_ctlblk {
-       unsigned int cccr_msr;
-       unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
-       unsigned int evntsel_msr;  /* the MSR to select the events to handle */
-};
-
-/* Interface defining a CPU specific perfctr watchdog */
-struct wd_ops {
-       int (*reserve)(void);
-       void (*unreserve)(void);
-       int (*setup)(unsigned nmi_hz);
-       void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
-       void (*stop)(void);
-       unsigned perfctr;
-       unsigned evntsel;
-       u64 checkbit;
-};
-
-static const struct wd_ops *wd_ops;
-
 /*
  * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
 
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
-
 /* converts an msr to an appropriate reservation bit */
 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 {
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
        clear_bit(counter, evntsel_nmi_owner);
 }
 EXPORT_SYMBOL(release_evntsel_nmi);
-
-void disable_lapic_nmi_watchdog(void)
-{
-       BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-       if (atomic_read(&nmi_active) <= 0)
-               return;
-
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-
-       if (wd_ops)
-               wd_ops->unreserve();
-
-       BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_lapic_nmi_watchdog(void)
-{
-       BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-       /* are we already enabled */
-       if (atomic_read(&nmi_active) != 0)
-               return;
-
-       /* are we lapic aware */
-       if (!wd_ops)
-               return;
-       if (!wd_ops->reserve()) {
-               printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
-               return;
-       }
-
-       on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
-       touch_nmi_watchdog();
-}
-
-/*
- * Activate the NMI watchdog via the local APIC.
- */
-
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
-       u64 counter_val;
-       unsigned int retval = hz;
-
-       /*
-        * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
-        * are writable, with higher bits sign extending from bit 31.
-        * So, we can only program the counter with 31 bit values and
-        * 32nd bit should be 1, for 33.. to be 1.
-        * Find the appropriate nmi_hz
-        */
-       counter_val = (u64)cpu_khz * 1000;
-       do_div(counter_val, retval);
-       if (counter_val > 0x7fffffffULL) {
-               u64 count = (u64)cpu_khz * 1000;
-               do_div(count, 0x7fffffffUL);
-               retval = count + 1;
-       }
-       return retval;
-}
-
-static void write_watchdog_counter(unsigned int perfctr_msr,
-                               const char *descr, unsigned nmi_hz)
-{
-       u64 count = (u64)cpu_khz * 1000;
-
-       do_div(count, nmi_hz);
-       if (descr)
-               pr_debug("setting %s to -0x%08Lx\n", descr, count);
-       wrmsrl(perfctr_msr, 0 - count);
-}
-
-static void write_watchdog_counter32(unsigned int perfctr_msr,
-                               const char *descr, unsigned nmi_hz)
-{
-       u64 count = (u64)cpu_khz * 1000;
-
-       do_div(count, nmi_hz);
-       if (descr)
-               pr_debug("setting %s to -0x%08Lx\n", descr, count);
-       wrmsr(perfctr_msr, (u32)(-count), 0);
-}
-
-/*
- * AMD K7/K8/Family10h/Family11h support.
- * AMD keeps this interface nicely stable so there is not much variety
- */
-#define K7_EVNTSEL_ENABLE      (1 << 22)
-#define K7_EVNTSEL_INT         (1 << 20)
-#define K7_EVNTSEL_OS          (1 << 17)
-#define K7_EVNTSEL_USR         (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING   0x76
-#define K7_NMI_EVENT           K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       wrmsrl(perfctr_msr, 0UL);
-
-       evntsel = K7_EVNTSEL_INT
-               | K7_EVNTSEL_OS
-               | K7_EVNTSEL_USR
-               | K7_NMI_EVENT;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
-
-       /* initialize the wd struct before enabling */
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= K7_EVNTSEL_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-
-       return 1;
-}
-
-static void single_msr_stop_watchdog(void)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int single_msr_reserve(void)
-{
-       if (!reserve_perfctr_nmi(wd_ops->perfctr))
-               return 0;
-
-       if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
-               release_perfctr_nmi(wd_ops->perfctr);
-               return 0;
-       }
-       return 1;
-}
-
-static void single_msr_unreserve(void)
-{
-       release_evntsel_nmi(wd_ops->evntsel);
-       release_perfctr_nmi(wd_ops->perfctr);
-}
-
-static void __kprobes
-single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       /* start the cycle over again */
-       write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops k7_wd_ops = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_k7_watchdog,
-       .rearm          = single_msr_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_K7_PERFCTR0,
-       .evntsel        = MSR_K7_EVNTSEL0,
-       .checkbit       = 1ULL << 47,
-};
-
-/*
- * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
- */
-#define P6_EVNTSEL0_ENABLE     (1 << 22)
-#define P6_EVNTSEL_INT         (1 << 20)
-#define P6_EVNTSEL_OS          (1 << 17)
-#define P6_EVNTSEL_USR         (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT           P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-static int setup_p6_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       /* KVM doesn't implement this MSR */
-       if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
-               return 0;
-
-       evntsel = P6_EVNTSEL_INT
-               | P6_EVNTSEL_OS
-               | P6_EVNTSEL_USR
-               | P6_NMI_EVENT;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-       write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
-
-       /* initialize the wd struct before enabling */
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= P6_EVNTSEL0_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-
-       return 1;
-}
-
-static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       /*
-        * P6 based Pentium M need to re-unmask
-        * the apic vector but it doesn't hurt
-        * other P6 variant.
-        * ArchPerfom/Core Duo also needs this
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-       /* P6/ARCH_PERFMON has 32 bit counter write */
-       write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p6_wd_ops = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_p6_watchdog,
-       .rearm          = p6_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_P6_PERFCTR0,
-       .evntsel        = MSR_P6_EVNTSEL0,
-       .checkbit       = 1ULL << 39,
-};
-
-/*
- * Intel P4 performance counters.
- * By far the most complicated of all.
- */
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL  (1 << 7)
-#define P4_ESCR_EVENT_SELECT(N)        ((N) << 25)
-#define P4_ESCR_OS             (1 << 3)
-#define P4_ESCR_USR            (1 << 2)
-#define P4_CCCR_OVF_PMI0       (1 << 26)
-#define P4_CCCR_OVF_PMI1       (1 << 27)
-#define P4_CCCR_THRESHOLD(N)   ((N) << 20)
-#define P4_CCCR_COMPLEMENT     (1 << 19)
-#define P4_CCCR_COMPARE                (1 << 18)
-#define P4_CCCR_REQUIRED       (3 << 16)
-#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
-#define P4_CCCR_ENABLE         (1 << 12)
-#define P4_CCCR_OVF            (1 << 31)
-
-#define P4_CONTROLS 18
-static unsigned int p4_controls[18] = {
-       MSR_P4_BPU_CCCR0,
-       MSR_P4_BPU_CCCR1,
-       MSR_P4_BPU_CCCR2,
-       MSR_P4_BPU_CCCR3,
-       MSR_P4_MS_CCCR0,
-       MSR_P4_MS_CCCR1,
-       MSR_P4_MS_CCCR2,
-       MSR_P4_MS_CCCR3,
-       MSR_P4_FLAME_CCCR0,
-       MSR_P4_FLAME_CCCR1,
-       MSR_P4_FLAME_CCCR2,
-       MSR_P4_FLAME_CCCR3,
-       MSR_P4_IQ_CCCR0,
-       MSR_P4_IQ_CCCR1,
-       MSR_P4_IQ_CCCR2,
-       MSR_P4_IQ_CCCR3,
-       MSR_P4_IQ_CCCR4,
-       MSR_P4_IQ_CCCR5,
-};
-/*
- * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- * CRU_ESCR0 (with any non-null event selector) through a complemented
- * max threshold. [IA32-Vol3, Section 14.9.9]
- */
-static int setup_p4_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr, cccr_msr;
-       unsigned int evntsel, cccr_val;
-       unsigned int misc_enable, dummy;
-       unsigned int ht_num;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
-       if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
-               return 0;
-
-#ifdef CONFIG_SMP
-       /* detect which hyperthread we are on */
-       if (smp_num_siblings == 2) {
-               unsigned int ebx, apicid;
-
-               ebx = cpuid_ebx(1);
-               apicid = (ebx >> 24) & 0xff;
-               ht_num = apicid & 1;
-       } else
-#endif
-               ht_num = 0;
-
-       /*
-        * performance counters are shared resources
-        * assign each hyperthread its own set
-        * (re-use the ESCR0 register, seems safe
-        * and keeps the cccr_val the same)
-        */
-       if (!ht_num) {
-               /* logical cpu 0 */
-               perfctr_msr = MSR_P4_IQ_PERFCTR0;
-               evntsel_msr = MSR_P4_CRU_ESCR0;
-               cccr_msr = MSR_P4_IQ_CCCR0;
-               cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
-
-               /*
-                * If we're on the kdump kernel or other situation, we may
-                * still have other performance counter registers set to
-                * interrupt and they'll keep interrupting forever because
-                * of the P4_CCCR_OVF quirk. So we need to ACK all the
-                * pending interrupts and disable all the registers here,
-                * before reenabling the NMI delivery. Refer to p4_rearm()
-                * about the P4_CCCR_OVF quirk.
-                */
-               if (reset_devices) {
-                       unsigned int low, high;
-                       int i;
-
-                       for (i = 0; i < P4_CONTROLS; i++) {
-                               rdmsr(p4_controls[i], low, high);
-                               low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
-                               wrmsr(p4_controls[i], low, high);
-                       }
-               }
-       } else {
-               /* logical cpu 1 */
-               perfctr_msr = MSR_P4_IQ_PERFCTR1;
-               evntsel_msr = MSR_P4_CRU_ESCR0;
-               cccr_msr = MSR_P4_IQ_CCCR1;
-
-               /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
-               if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
-                       cccr_val = P4_CCCR_OVF_PMI0;
-               else
-                       cccr_val = P4_CCCR_OVF_PMI1;
-               cccr_val |= P4_CCCR_ESCR_SELECT(4);
-       }
-
-       evntsel = P4_ESCR_EVENT_SELECT(0x3F)
-               | P4_ESCR_OS
-               | P4_ESCR_USR;
-
-       cccr_val |= P4_CCCR_THRESHOLD(15)
-                | P4_CCCR_COMPLEMENT
-                | P4_CCCR_COMPARE
-                | P4_CCCR_REQUIRED;
-
-       wrmsr(evntsel_msr, evntsel, 0);
-       wrmsr(cccr_msr, cccr_val, 0);
-       write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = cccr_msr;
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       cccr_val |= P4_CCCR_ENABLE;
-       wrmsr(cccr_msr, cccr_val, 0);
-       return 1;
-}
-
-static void stop_p4_watchdog(void)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       wrmsr(wd->cccr_msr, 0, 0);
-       wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int p4_reserve(void)
-{
-       if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
-               return 0;
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
-               goto fail1;
-#endif
-       if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
-               goto fail2;
-       /* RED-PEN why is ESCR1 not reserved here? */
-       return 1;
- fail2:
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1)
-               release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
- fail1:
-#endif
-       release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-       return 0;
-}
-
-static void p4_unreserve(void)
-{
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1)
-               release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
-#endif
-       release_evntsel_nmi(MSR_P4_CRU_ESCR0);
-       release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-}
-
-static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       unsigned dummy;
-       /*
-        * P4 quirks:
-        * - An overflown perfctr will assert its interrupt
-        *   until the OVF flag in its CCCR is cleared.
-        * - LVTPC is masked on interrupt and must be
-        *   unmasked by the LVTPC handler.
-        */
-       rdmsrl(wd->cccr_msr, dummy);
-       dummy &= ~P4_CCCR_OVF;
-       wrmsrl(wd->cccr_msr, dummy);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       /* start the cycle over again */
-       write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p4_wd_ops = {
-       .reserve        = p4_reserve,
-       .unreserve      = p4_unreserve,
-       .setup          = setup_p4_watchdog,
-       .rearm          = p4_rearm,
-       .stop           = stop_p4_watchdog,
-       /* RED-PEN this is wrong for the other sibling */
-       .perfctr        = MSR_P4_BPU_PERFCTR0,
-       .evntsel        = MSR_P4_BSU_ESCR0,
-       .checkbit       = 1ULL << 39,
-};
-
-/*
- * Watchdog using the Intel architected PerfMon.
- * Used for Core2 and hopefully all future Intel CPUs.
- */
-#define ARCH_PERFMON_NMI_EVENT_SEL     ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK   ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static struct wd_ops intel_arch_wd_ops;
-
-static int setup_intel_arch_watchdog(unsigned nmi_hz)
-{
-       unsigned int ebx;
-       union cpuid10_eax eax;
-       unsigned int unused;
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       /*
-        * Check whether the Architectural PerfMon supports
-        * Unhalted Core Cycles Event or not.
-        * NOTE: Corresponding bit = 0 in ebx indicates event present.
-        */
-       cpuid(10, &(eax.full), &ebx, &unused, &unused);
-       if ((eax.split.mask_length <
-                       (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
-           (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
-               return 0;
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       wrmsrl(perfctr_msr, 0UL);
-
-       evntsel = ARCH_PERFMON_EVENTSEL_INT
-               | ARCH_PERFMON_EVENTSEL_OS
-               | ARCH_PERFMON_EVENTSEL_USR
-               | ARCH_PERFMON_NMI_EVENT_SEL
-               | ARCH_PERFMON_NMI_EVENT_UMASK;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-       write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-       intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
-       return 1;
-}
-
-static struct wd_ops intel_arch_wd_ops __read_mostly = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_intel_arch_watchdog,
-       .rearm          = p6_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_ARCH_PERFMON_PERFCTR1,
-       .evntsel        = MSR_ARCH_PERFMON_EVENTSEL1,
-};
-
-static void probe_nmi_watchdog(void)
-{
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_AMD:
-               if (boot_cpu_data.x86 == 6 ||
-                   (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
-                       wd_ops = &k7_wd_ops;
-               return;
-       case X86_VENDOR_INTEL:
-               /* Work around where perfctr1 doesn't have a working enable
-                * bit as described in the following errata:
-                * AE49 Core Duo and Intel Core Solo 65 nm
-                * AN49 Intel Pentium Dual-Core
-                * AF49 Dual-Core Intel Xeon Processor LV
-                */
-               if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
-                   ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
-                    boot_cpu_data.x86_mask == 4))) {
-                       intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
-                       intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
-               }
-               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-                       wd_ops = &intel_arch_wd_ops;
-                       break;
-               }
-               switch (boot_cpu_data.x86) {
-               case 6:
-                       if (boot_cpu_data.x86_model > 13)
-                               return;
-
-                       wd_ops = &p6_wd_ops;
-                       break;
-               case 15:
-                       wd_ops = &p4_wd_ops;
-                       break;
-               default:
-                       return;
-               }
-               break;
-       }
-}
-
-/* Interface to nmi.c */
-
-int lapic_watchdog_init(unsigned nmi_hz)
-{
-       if (!wd_ops) {
-               probe_nmi_watchdog();
-               if (!wd_ops) {
-                       printk(KERN_INFO "NMI watchdog: CPU not supported\n");
-                       return -1;
-               }
-
-               if (!wd_ops->reserve()) {
-                       printk(KERN_ERR
-                               "NMI watchdog: cannot reserve perfctrs\n");
-                       return -1;
-               }
-       }
-
-       if (!(wd_ops->setup(nmi_hz))) {
-               printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
-                      raw_smp_processor_id());
-               return -1;
-       }
-
-       return 0;
-}
-
-void lapic_watchdog_stop(void)
-{
-       if (wd_ops)
-               wd_ops->stop();
-}
-
-unsigned lapic_adjust_nmi_hz(unsigned hz)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
-           wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
-               hz = adjust_for_32bit_ctr(hz);
-       return hz;
-}
-
-int __kprobes lapic_wd_event(unsigned nmi_hz)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       u64 ctr;
-
-       rdmsrl(wd->perfctr_msr, ctr);
-       if (ctr & wd_ops->checkbit) /* perfctr still running? */
-               return 0;
-
-       wd_ops->rearm(wd, nmi_hz);
-       return 1;
-}
index 6e8752c1bd5241fc9e7e63ee088f06c84d0526fb..8474c998cbd40d2f3481f87879f5a42d1f105e52 100644 (file)
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
 
 void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp, char *log_lvl)
+               unsigned long *stack, char *log_lvl)
 {
        printk("%sCall Trace:\n", log_lvl);
-       dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+       dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp)
+               unsigned long *stack)
 {
-       show_trace_log_lvl(task, regs, stack, bp, "");
+       show_trace_log_lvl(task, regs, stack, "");
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-       show_stack_log_lvl(task, NULL, sp, 0, "");
+       show_stack_log_lvl(task, NULL, sp, "");
 }
 
 /*
@@ -210,7 +210,7 @@ void dump_stack(void)
                init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
                init_utsname()->version);
-       show_trace(NULL, NULL, &stack, bp);
+       show_trace(NULL, NULL, &stack);
 }
 EXPORT_SYMBOL(dump_stack);
 
index 1bc7f75a5bdaf823999f7b90271808819b6f3ad3..74cc1eda384b8d26437a10fa55e3f64cfcf4545a 100644 (file)
 #include <asm/stacktrace.h>
 
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+               struct pt_regs *regs, unsigned long *stack,
                const struct stacktrace_ops *ops, void *data)
 {
        int graph = 0;
+       unsigned long bp;
 
        if (!task)
                task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                        stack = (unsigned long *)task->thread.sp;
        }
 
-#ifdef CONFIG_FRAME_POINTER
-       if (!bp) {
-               if (task == current) {
-                       /* Grab bp right from our regs */
-                       get_bp(bp);
-               } else {
-                       /* bp is the last reg pushed by switch_to */
-                       bp = *(unsigned long *) task->thread.sp;
-               }
-       }
-#endif
-
+       bp = stack_frame(task, regs);
        for (;;) {
                struct thread_info *context;
 
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                  unsigned long *sp, unsigned long bp, char *log_lvl)
+                  unsigned long *sp, char *log_lvl)
 {
        unsigned long *stack;
        int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
                touch_nmi_watchdog();
        }
        printk(KERN_CONT "\n");
-       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+       show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
                u8 *ip;
 
                printk(KERN_EMERG "Stack:\n");
-               show_stack_log_lvl(NULL, regs, &regs->sp,
-                               0, KERN_EMERG);
+               show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
 
                printk(KERN_EMERG "Code: ");
 
index 6a340485249a965f29686ac84117458d6cb72be6..64101335de19aad09ec03d75dea8260b28051747 100644 (file)
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+               struct pt_regs *regs, unsigned long *stack,
                const struct stacktrace_ops *ops, void *data)
 {
        const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
        unsigned used = 0;
        struct thread_info *tinfo;
        int graph = 0;
+       unsigned long bp;
 
        if (!task)
                task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                        stack = (unsigned long *)task->thread.sp;
        }
 
-#ifdef CONFIG_FRAME_POINTER
-       if (!bp) {
-               if (task == current) {
-                       /* Grab bp right from our regs */
-                       get_bp(bp);
-               } else {
-                       /* bp is the last reg pushed by switch_to */
-                       bp = *(unsigned long *) task->thread.sp;
-               }
-       }
-#endif
-
+       bp = stack_frame(task, regs);
        /*
         * Print function call entries in all stacks, starting at the
         * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                  unsigned long *sp, unsigned long bp, char *log_lvl)
+                  unsigned long *sp, char *log_lvl)
 {
        unsigned long *irq_stack_end;
        unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
        preempt_enable();
 
        printk(KERN_CONT "\n");
-       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+       show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
 
                printk(KERN_EMERG "Stack:\n");
                show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
-                               regs->bp, KERN_EMERG);
+                                  KERN_EMERG);
 
                printk(KERN_EMERG "Code: ");
 
index 1cbd54c0df99189548a3a03f40fbb75a1703475a..5940282bd2f94ed886226bc717c189e593adab50 100644 (file)
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
+       /* This is possible if op is under delayed unoptimizing */
+       if (kprobe_disabled(&op->kp))
+               return;
+
        preempt_disable();
        if (kprobe_running()) {
                kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
        return 0;
 }
 
-/* Replace a breakpoint (int3) with a relative jump.  */
-int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+       u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+                                           u8 *insn_buf,
+                                           struct optimized_kprobe *op)
 {
-       unsigned char jmp_code[RELATIVEJUMP_SIZE];
        s32 rel = (s32)((long)op->optinsn.insn -
                        ((long)op->kp.addr + RELATIVEJUMP_SIZE));
 
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
        memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
               RELATIVE_ADDR_SIZE);
 
-       jmp_code[0] = RELATIVEJUMP_OPCODE;
-       *(s32 *)(&jmp_code[1]) = rel;
+       insn_buf[0] = RELATIVEJUMP_OPCODE;
+       *(s32 *)(&insn_buf[1]) = rel;
+
+       tprm->addr = op->kp.addr;
+       tprm->opcode = insn_buf;
+       tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+       struct optimized_kprobe *op, *tmp;
+       int c = 0;
+
+       list_for_each_entry_safe(op, tmp, oplist, list) {
+               WARN_ON(kprobe_disabled(&op->kp));
+               /* Setup param */
+               setup_optimize_kprobe(&jump_poke_params[c],
+                                     jump_poke_bufs[c].buf, op);
+               list_del_init(&op->list);
+               if (++c >= MAX_OPTIMIZE_PROBES)
+                       break;
+       }
 
        /*
         * text_poke_smp doesn't support NMI/MCE code modifying.
         * However, since kprobes itself also doesn't support NMI/MCE
         * code probing, it's not a problem.
         */
-       text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
-       return 0;
+       text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+                                             u8 *insn_buf,
+                                             struct optimized_kprobe *op)
+{
+       /* Set int3 to first byte for kprobes */
+       insn_buf[0] = BREAKPOINT_INSTRUCTION;
+       memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+       tprm->addr = op->kp.addr;
+       tprm->opcode = insn_buf;
+       tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+                                   struct list_head *done_list)
+{
+       struct optimized_kprobe *op, *tmp;
+       int c = 0;
+
+       list_for_each_entry_safe(op, tmp, oplist, list) {
+               /* Setup param */
+               setup_unoptimize_kprobe(&jump_poke_params[c],
+                                       jump_poke_bufs[c].buf, op);
+               list_move(&op->list, done_list);
+               if (++c >= MAX_OPTIMIZE_PROBES)
+                       break;
+       }
+
+       /*
+        * text_poke_smp doesn't support NMI/MCE code modifying.
+        * However, since kprobes itself also doesn't support NMI/MCE
+        * code probing, it's not a problem.
+        */
+       text_poke_smp_batch(jump_poke_params, c);
 }
 
 /* Replace a relative jump with a breakpoint (int3).  */
@@ -1453,11 +1526,35 @@ static int  __kprobes setup_detour_execution(struct kprobe *p,
        }
        return 0;
 }
+
+static int __kprobes init_poke_params(void)
+{
+       /* Allocate code buffer and parameter array */
+       jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+                                MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+       if (!jump_poke_bufs)
+               return -ENOMEM;
+
+       jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+                                  MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+       if (!jump_poke_params) {
+               kfree(jump_poke_bufs);
+               jump_poke_bufs = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+#else  /* !CONFIG_OPTPROBES */
+static int __kprobes init_poke_params(void)
+{
+       return 0;
+}
 #endif
 
 int __init arch_init_kprobes(void)
 {
-       return 0;
+       return init_poke_params();
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
index 57d1868a86aadc060bc2260b34139809a98ffab5..96ed1aac543a39b773117c7411641a40450bc6c9 100644 (file)
@@ -91,8 +91,7 @@ void exit_thread(void)
 void show_regs(struct pt_regs *regs)
 {
        show_registers(regs);
-       show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
-                  regs->bp);
+       show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
 }
 
 void show_regs_common(void)
index 083e99d1b7df2aba236563467f47ebb21a09943d..68f61ac632e1d814eaa5ab0604505f9b72fc6f80 100644 (file)
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
         */
        smp_store_cpu_info(cpuid);
 
+       /*
+        * This must be done before setting cpu_online_mask
+        * or calling notify_cpu_starting.
+        */
+       set_cpu_sibling_map(raw_smp_processor_id());
+       wmb();
+
        notify_cpu_starting(cpuid);
 
        /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
         */
        check_tsc_sync_target();
 
-       if (nmi_watchdog == NMI_IO_APIC) {
-               legacy_pic->mask(0);
-               enable_NMI_through_LVT0();
-               legacy_pic->unmask(0);
-       }
-
-       /* This must be done before setting cpu_online_mask */
-       set_cpu_sibling_map(raw_smp_processor_id());
-       wmb();
-
        /*
         * We need to hold call_lock, so there is no inconsistency
         * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
                printk(KERN_INFO "SMP mode deactivated.\n");
                smpboot_clear_io_apic();
 
-               localise_nmi_watchdog();
-
                connect_bsp_APIC();
                setup_local_APIC();
                end_local_APIC_setup();
@@ -1196,7 +1191,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 #ifdef CONFIG_X86_IO_APIC
        setup_ioapic_dest();
 #endif
-       check_nmi_watchdog();
        mtrr_aps_init();
 }
 
@@ -1341,8 +1335,6 @@ int native_cpu_disable(void)
        if (cpu == 0)
                return -EBUSY;
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               stop_apic_nmi_watchdog(NULL);
        clear_local_APIC();
 
        cpu_disable_common();
index b53c525368a75cf07489b0327de138bfab5b16d5..938c8e10a19abeae0e2c814e182a0920ff0328f9 100644 (file)
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
  */
 void save_stack_trace(struct stack_trace *trace)
 {
-       dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
+       dump_trace(current, NULL, NULL, &save_stack_ops, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
-void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
+void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
 {
-       dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
+       dump_trace(current, regs, NULL, &save_stack_ops, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-       dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
+       dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
index fb5cc5e14cfafb6ed62c4de2929b3bfa0a2e91e9..25a28a245937989d2de9abfb94d96835a1b01615 100644 (file)
 #include <asm/hpet.h>
 #include <asm/time.h>
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-int timer_ack;
-#endif
-
 #ifdef CONFIG_X86_64
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 #endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
        /* Keep nmi watchdog up to date */
        inc_irq_stat(irq0_irqs);
 
-       /* Optimized out for !IO_APIC and x86_64 */
-       if (timer_ack) {
-               /*
-                * Subtle, when I/O APICs are used we have to ack timer IRQ
-                * manually to deassert NMI lines for the watchdog if run
-                * on an 82489DX-based system.
-                */
-               raw_spin_lock(&i8259A_lock);
-               outb(0x0c, PIC_MASTER_OCW3);
-               /* Ack the IRQ; AEOI will end it automatically. */
-               inb(PIC_MASTER_POLL);
-               raw_spin_unlock(&i8259A_lock);
-       }
-
        global_clock_event->event_handler(global_clock_event);
 
        /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
index cb838ca42c9664c2ecf9530d170d27cdbc679f7a..bb6f041673615850aec151c4b51cf6d4ac17a29e 100644 (file)
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+int unknown_nmi_panic;
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
        if (regs->flags & X86_EFLAGS_IF)
@@ -300,6 +302,13 @@ gp_in_kernel:
        die("general protection fault", regs, error_code);
 }
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+       unknown_nmi_panic = 1;
+       return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
 static notrace __kprobes void
 mem_parity_error(unsigned char reason, struct pt_regs *regs)
 {
@@ -371,7 +380,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
                        reason, smp_processor_id());
 
        printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-       if (panic_on_unrecovered_nmi)
+       if (unknown_nmi_panic || panic_on_unrecovered_nmi)
                panic("NMI: Not continuing");
 
        printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -397,20 +406,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
                if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
                                                        == NOTIFY_STOP)
                        return;
-
-#ifndef CONFIG_LOCKUP_DETECTOR
-               /*
-                * Ok, so this is none of the documented NMI sources,
-                * so it must be the NMI watchdog.
-                */
-               if (nmi_watchdog_tick(regs, reason))
-                       return;
-               if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
-                       unknown_nmi_error(reason, regs);
-#else
-               unknown_nmi_error(reason, regs);
 #endif
+               unknown_nmi_error(reason, regs);
 
                return;
        }
@@ -446,14 +443,12 @@ do_nmi(struct pt_regs *regs, long error_code)
 
 void stop_nmi(void)
 {
-       acpi_nmi_disable();
        ignore_nmis++;
 }
 
 void restart_nmi(void)
 {
        ignore_nmis--;
-       acpi_nmi_enable();
 }
 
 /* May run on IST stack. */
index af3b6c8a436f7b7ec49a2366738a28faa58cdfc5..704a37cedddb59404a3c1fc773e44853b2089939 100644 (file)
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
        e->trace.entries = e->trace_entries;
        e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
        e->trace.skip = 0;
-       save_stack_trace_bp(&e->trace, regs->bp);
+       save_stack_trace_regs(&e->trace, regs);
 
        /* Round address down to nearest 16 bytes */
        shadow_copy = kmemcheck_shadow_lookup(address
index 2d49d4e19a3619c0be2c7d17a892b8aea582048f..72cbec14d783867cb5f5fa8547eb6fcee2fe28b9 100644 (file)
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
        if (!user_mode_vm(regs)) {
                unsigned long stack = kernel_stack_pointer(regs);
                if (depth)
-                       dump_trace(NULL, regs, (unsigned long *)stack, 0,
+                       dump_trace(NULL, regs, (unsigned long *)stack,
                                   &backtrace_ops, &depth);
                return;
        }
index 4e8baad36d37739e32b71da0be2932924bacfe69..358c8b9c96a79c725766e1627544486eb312a0bc 100644 (file)
@@ -732,6 +732,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
                case 0x14:
                        cpu_type = "x86-64/family14h";
                        break;
+               case 0x15:
+                       cpu_type = "x86-64/family15h";
+                       break;
                default:
                        return -ENODEV;
                }
index e3ecb71b5790228073d5eb089f4a4fc9303ab09a..0636dd93cef8d64a718124ddfa7c5a02edd2174f 100644 (file)
@@ -58,9 +58,6 @@ static void timer_stop(void)
 
 int __init op_nmi_timer_init(struct oprofile_operations *ops)
 {
-       if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
-               return -ENODEV;
-
        ops->start = timer_start;
        ops->stop = timer_stop;
        ops->cpu_type = "timer";
index a011bcc0f94331d82c8abfa7d4afdbbd0c59eff5..f2984d43a6b3bbdf9973cf30146721d98fb8467c 100644 (file)
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS 4
+#define NUM_COUNTERS           4
+#define NUM_COUNTERS_F15H      6
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_COUNTERS      32
 #else
-#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_COUNTERS      0
 #endif
 
 #define OP_EVENT_MASK                  0x0FFF
@@ -41,7 +42,8 @@
 
 #define MSR_AMD_EVENTSEL_RESERVED      ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
-static unsigned long reset_value[NUM_VIRT_COUNTERS];
+static int num_counters;
+static unsigned long reset_value[OP_MAX_COUNTER];
 
 #define IBS_FETCH_SIZE                 6
 #define IBS_OP_SIZE                    12
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
        int i;
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 {
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!msrs->counters[i].addr)
                        continue;
                release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 {
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; i++) {
+       for (i = 0; i < num_counters; i++) {
                if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
                        goto fail;
                if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
                        goto fail;
                }
                /* both registers must be reserved */
-               msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
-               msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+               if (num_counters == NUM_COUNTERS_F15H) {
+                       msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
+                       msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
+               } else {
+                       msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+                       msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+               }
                continue;
        fail:
                if (!counter_config[i].enabled)
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        int i;
 
        /* setup reset_value */
-       for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+       for (i = 0; i < OP_MAX_COUNTER; ++i) {
                if (counter_config[i].enabled
                    && msrs->counters[op_x86_virt_to_phys(i)].addr)
                        reset_value[i] = counter_config[i].count;
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        /* clear all counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!msrs->controls[i].addr)
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
        u64 val;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
        u64 val;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
         * Subtle: stop on all counters to avoid race with setting our
         * pm callback
         */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -698,18 +705,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
        return 0;
 }
 
+struct op_x86_model_spec op_amd_spec;
+
 static int op_amd_init(struct oprofile_operations *ops)
 {
        init_ibs();
        create_arch_files = ops->create_files;
        ops->create_files = setup_ibs_files;
+
+       if (boot_cpu_data.x86 == 0x15) {
+               num_counters = NUM_COUNTERS_F15H;
+       } else {
+               num_counters = NUM_COUNTERS;
+       }
+
+       op_amd_spec.num_counters = num_counters;
+       op_amd_spec.num_controls = num_counters;
+       op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
+
        return 0;
 }
 
 struct op_x86_model_spec op_amd_spec = {
-       .num_counters           = NUM_COUNTERS,
-       .num_controls           = NUM_COUNTERS,
-       .num_virt_counters      = NUM_VIRT_COUNTERS,
+       /* num_counters/num_controls filled in at runtime */
        .reserved               = MSR_AMD_EVENTSEL_RESERVED,
        .event_mask             = OP_EVENT_MASK,
        .init                   = op_amd_init,
index 182558dd5515add420a27dfa58304d04b6a6f71a..9fadec074142b11afcb39e73627dc4c4fd8e14dd 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/oprofile.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
index 660a2728908d6e15f8abbe5b2eae43b4a91df503..0cac7ec0d2ece0764806fae7a11994da0520ab23 100644 (file)
@@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
         * as possible (without an NMI being received in the middle of
         * this) - so disable NMIs and initialize the device:
         */
-       acpi_nmi_disable();
        status = acpi_ns_evaluate(info);
-       acpi_nmi_enable();
 
        if (ACPI_SUCCESS(status)) {
                walk_info->num_INI++;
index 3d77116e463410dac81b6c530c74d5a55f9c7afc..dea7b5bf6e2ccd986cf99840db504d6a38382293 100644 (file)
@@ -642,19 +642,14 @@ static struct notifier_block die_notifier = {
  */
 
 #ifdef CONFIG_HPWDT_NMI_DECODING
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
 {
        /*
         * If nmi_watchdog is turned off then we can turn on
         * our nmi decoding capability.
         */
-       if (!nmi_watchdog_active())
-               hpwdt_nmi_decoding = 1;
-       else
-               dev_warn(&dev->dev, "NMI decoding is disabled. To enable this "
-                       "functionality you must reboot with nmi_watchdog=0 "
-                       "and load the hpwdt driver with priority=1.\n");
+       hpwdt_nmi_decoding = 1;
 }
 #else
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
@@ -662,7 +657,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
        dev_warn(&dev->dev, "NMI decoding is disabled. "
                "Your kernel does not support a NMI Watchdog.\n");
 }
-#endif /* ARCH_HAS_NMI_WATCHDOG */
+#endif /* CONFIG_X86_LOCAL_APIC */
 
 static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
index 8beabb958f61d5147c8893f1e780415a91fcb2e6..47e3997f7b5cf39233283ff43d84937daa502c2f 100644 (file)
@@ -154,12 +154,14 @@ enum {
        TRACE_EVENT_FL_ENABLED_BIT,
        TRACE_EVENT_FL_FILTERED_BIT,
        TRACE_EVENT_FL_RECORDED_CMD_BIT,
+       TRACE_EVENT_FL_CAP_ANY_BIT,
 };
 
 enum {
        TRACE_EVENT_FL_ENABLED          = (1 << TRACE_EVENT_FL_ENABLED_BIT),
        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
        TRACE_EVENT_FL_RECORDED_CMD     = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
+       TRACE_EVENT_FL_CAP_ANY          = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 };
 
 struct ftrace_event_call {
@@ -196,6 +198,14 @@ struct ftrace_event_call {
 #endif
 };
 
+#define __TRACE_EVENT_FLAGS(name, value)                               \
+       static int __init trace_init_flags_##name(void)                 \
+       {                                                               \
+               event_##name.flags = value;                             \
+               return 0;                                               \
+       }                                                               \
+       early_initcall(trace_init_flags_##name);
+
 #define PERF_MAX_TRACE_SIZE    2048
 
 #define MAX_FILTER_PRED                32
@@ -215,6 +225,10 @@ enum {
        FILTER_PTR_STRING,
 };
 
+#define EVENT_STORAGE_SIZE 128
+extern struct mutex event_storage_mutex;
+extern char event_storage[EVENT_STORAGE_SIZE];
+
 extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
                              const char *name, int offset, int size,
index e7d1b2e0070d3570b7022877a79fa2f0ed081507..b78edb58ee66164e756b4789baf71ab86e8684c4 100644 (file)
@@ -275,7 +275,9 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
 extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
 extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
 extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
-extern int  arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_optimize_kprobes(struct list_head *oplist);
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+                                   struct list_head *done_list);
 extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
 extern kprobe_opcode_t *get_optinsn_slot(void);
 extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
index 06aab5eee134cd56c4bade9005912fa3a785a327..c536f8545f74c11e345943187f201bfc25e48baa 100644 (file)
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 #include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
-extern void acpi_nmi_disable(void);
-extern void acpi_nmi_enable(void);
 #else
-#ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
        touch_softlockup_watchdog();
 }
-#else
-extern void touch_nmi_watchdog(void);
-#endif
-static inline void acpi_nmi_disable(void) { }
-static inline void acpi_nmi_enable(void) { }
 #endif
 
 /*
index 4f1279e105ee143e4317219b3cb093bc8bbdd954..dda5b0a3ff6014b8a0741a186ed0e3968b63d298 100644 (file)
@@ -215,8 +215,9 @@ struct perf_event_attr {
                                 */
                                precise_ip     :  2, /* skid constraint       */
                                mmap_data      :  1, /* non-exec mmap data    */
+                               sample_id_all  :  1, /* sample_type all events */
 
-                               __reserved_1   : 46;
+                               __reserved_1   : 45;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -327,6 +328,15 @@ struct perf_event_header {
 enum perf_event_type {
 
        /*
+        * If perf_event_attr.sample_id_all is set then all event types will
+        * have the sample_type selected fields related to where/when
+        * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
+        * described in PERF_RECORD_SAMPLE below, it will be stashed just after
+        * the perf_event_header and the fields already present for the existing
+        * fields, i.e. at the end of the payload. That way a newer perf.data
+        * file will be supported by older perf tools, with these new optional
+        * fields being ignored.
+        *
         * The MMAP events record the PROT_EXEC mappings so that we can
         * correlate userspace IPs to code. They have the following structure:
         *
@@ -578,6 +588,10 @@ struct perf_event;
 struct pmu {
        struct list_head                entry;
 
+       struct device                   *dev;
+       char                            *name;
+       int                             type;
+
        int * __percpu                  pmu_disable_count;
        struct perf_cpu_context * __percpu pmu_cpu_context;
        int                             task_ctx_nr;
@@ -758,6 +772,9 @@ struct perf_event {
        u64                             shadow_ctx_time;
 
        struct perf_event_attr          attr;
+       u16                             header_size;
+       u16                             id_header_size;
+       u16                             read_size;
        struct hw_perf_event            hw;
 
        struct perf_event_context       *ctx;
@@ -903,7 +920,7 @@ struct perf_output_handle {
 
 #ifdef CONFIG_PERF_EVENTS
 
-extern int perf_pmu_register(struct pmu *pmu);
+extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
 extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
@@ -970,6 +987,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs);
 
+static inline bool is_sampling_event(struct perf_event *event)
+{
+       return event->attr.sample_period != 0;
+}
+
 /*
  * Return 1 for a software event, 0 for a hardware event
  */
index 223874538b33208e3c5ff11710f3161d58b4aef2..a99d735db3dfe5ee26fba4aebc5261dedfa74af9 100644 (file)
@@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
                                  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
+void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
 {
@@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void)
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
+static inline void lockup_detector_init(void)
+{
+}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
index 51efbef38fb0e204cfddb61b56619d52cefab623..25310f1d7f3773c540e51e7103a4edfa98db7a33 100644 (file)
@@ -2,6 +2,7 @@
 #define __LINUX_STACKTRACE_H
 
 struct task_struct;
+struct pt_regs;
 
 #ifdef CONFIG_STACKTRACE
 struct task_struct;
@@ -13,7 +14,8 @@ struct stack_trace {
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
-extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp);
+extern void save_stack_trace_regs(struct stack_trace *trace,
+                                 struct pt_regs *regs);
 extern void save_stack_trace_tsk(struct task_struct *tsk,
                                struct stack_trace *trace);
 
index cacc27a0e285163d9a8727a4131ffed478b8f46c..18cd0684fc4ec4bb2e6fb52ed6a7838737688c17 100644 (file)
@@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 #define SYSCALL_TRACE_ENTER_EVENT(sname)                               \
        static struct syscall_metadata                                  \
        __attribute__((__aligned__(4))) __syscall_meta_##sname;         \
-       static struct ftrace_event_call                                 \
-       __attribute__((__aligned__(4))) event_enter_##sname;            \
        static struct ftrace_event_call __used                          \
          __attribute__((__aligned__(4)))                               \
          __attribute__((section("_ftrace_events")))                    \
@@ -137,13 +135,12 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .class                  = &event_class_syscall_enter,   \
                .event.funcs            = &enter_syscall_print_funcs,   \
                .data                   = (void *)&__syscall_meta_##sname,\
-       }
+       };                                                              \
+       __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)                                        \
        static struct syscall_metadata                                  \
        __attribute__((__aligned__(4))) __syscall_meta_##sname;         \
-       static struct ftrace_event_call                                 \
-       __attribute__((__aligned__(4))) event_exit_##sname;             \
        static struct ftrace_event_call __used                          \
          __attribute__((__aligned__(4)))                               \
          __attribute__((section("_ftrace_events")))                    \
@@ -152,7 +149,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .class                  = &event_class_syscall_exit,    \
                .event.funcs            = &exit_syscall_print_funcs,    \
                .data                   = (void *)&__syscall_meta_##sname,\
-       }
+       };                                                              \
+       __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_METADATA(sname, nb)                            \
        SYSCALL_TRACE_ENTER_EVENT(sname);                       \
index a4a90b6726ce6129b43174609fb3e35a2bd088ae..d3e4f87e95c0fa67236f92c2a688fdaa640cfaae 100644 (file)
@@ -106,6 +106,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 
 #define TP_PROTO(args...)      args
 #define TP_ARGS(args...)       args
+#define TP_CONDITION(args...)  args
 
 #ifdef CONFIG_TRACEPOINTS
 
@@ -119,12 +120,14 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args)                                    \
+#define __DO_TRACE(tp, proto, args, cond)                              \
        do {                                                            \
                struct tracepoint_func *it_func_ptr;                    \
                void *it_func;                                          \
                void *__data;                                           \
                                                                        \
+               if (!(cond))                                            \
+                       return;                                         \
                rcu_read_lock_sched_notrace();                          \
                it_func_ptr = rcu_dereference_sched((tp)->funcs);       \
                if (it_func_ptr) {                                      \
@@ -142,7 +145,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)      \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)        \
        extern struct tracepoint __tracepoint_##name;                   \
        static inline void trace_##name(proto)                          \
        {                                                               \
@@ -151,7 +154,8 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 do_trace:                                                              \
                        __DO_TRACE(&__tracepoint_##name,                \
                                TP_PROTO(data_proto),                   \
-                               TP_ARGS(data_args));                    \
+                               TP_ARGS(data_args),                     \
+                               TP_CONDITION(cond));                    \
        }                                                               \
        static inline int                                               \
        register_trace_##name(void (*probe)(data_proto), void *data)    \
@@ -186,7 +190,7 @@ do_trace:                                                           \
        EXPORT_SYMBOL(__tracepoint_##name)
 
 #else /* !CONFIG_TRACEPOINTS */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)      \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)        \
        static inline void trace_##name(proto)                          \
        { }                                                             \
        static inline int                                               \
@@ -227,13 +231,20 @@ do_trace:                                                         \
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)                                     \
-               __DECLARE_TRACE(name, void, , void *__data, __data)
+               __DECLARE_TRACE(name, void, , 1, void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)                               \
-               __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),      \
+               __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,   \
                                PARAMS(void *__data, proto),            \
                                PARAMS(__data, args))
 
+#define DECLARE_TRACE_CONDITION(name, proto, args, cond)               \
+       __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+                       PARAMS(void *__data, proto),                    \
+                       PARAMS(__data, args))
+
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #endif /* DECLARE_TRACE */
 
 #ifndef TRACE_EVENT
@@ -347,11 +358,21 @@ do_trace:                                                         \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_CONDITION(template, name, proto,          \
+                              args, cond)                      \
+       DECLARE_TRACE_CONDITION(name, PARAMS(proto),            \
+                               PARAMS(args), PARAMS(cond))
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)  \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,              \
                assign, print, reg, unreg)                      \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_CONDITION(name, proto, args, cond,         \
+                             struct, assign, print)            \
+       DECLARE_TRACE_CONDITION(name, PARAMS(proto),            \
+                               PARAMS(args), PARAMS(cond))
+
+#define TRACE_EVENT_FLAGS(event, flag)
 
 #endif /* ifdef TRACE_EVENT (see note above) */
index 1dfab54015113b83bce9f3302470c3a5ed95b5e7..b0b4eb24d592fb1f8ecba11294c10e802ff7cd2b 100644 (file)
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
        DEFINE_TRACE(name)
 
+#undef TRACE_EVENT_CONDITION
+#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
+       TRACE_EVENT(name,                                               \
+               PARAMS(proto),                                          \
+               PARAMS(args),                                           \
+               PARAMS(tstruct),                                        \
+               PARAMS(assign),                                         \
+               PARAMS(print))
+
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,             \
                assign, print, reg, unreg)                      \
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_TRACE(name)
 
+#undef DEFINE_EVENT_CONDITION
+#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)       \
        DEFINE_TRACE(name)
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef TRACE_EVENT_CONDITION
 #undef DECLARE_EVENT_CLASS
 #undef DEFINE_EVENT
 #undef DEFINE_EVENT_PRINT
+#undef DEFINE_EVENT_CONDITION
 #undef TRACE_HEADER_MULTI_READ
 #undef DECLARE_TRACE
 
index fb726ac7caee4f465033ff5d314d788db929ded5..5a4c04a75b3d369fc9665eca1deee12a2d442d61 100644 (file)
@@ -40,6 +40,8 @@ TRACE_EVENT_FN(sys_enter,
        syscall_regfunc, syscall_unregfunc
 );
 
+TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY)
+
 TRACE_EVENT_FN(sys_exit,
 
        TP_PROTO(struct pt_regs *regs, long ret),
@@ -62,6 +64,8 @@ TRACE_EVENT_FN(sys_exit,
        syscall_regfunc, syscall_unregfunc
 );
 
+TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY)
+
 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
 
 #endif /* _TRACE_EVENTS_SYSCALLS_H */
index a9377c0083ad3ed612547f783647132a8268ef09..e16610c208c954541587684c8af64584b01dbfda 100644 (file)
        TRACE_EVENT(name, PARAMS(proto), PARAMS(args),                  \
                PARAMS(tstruct), PARAMS(assign), PARAMS(print))         \
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(name, value)                                 \
+       __TRACE_EVENT_FLAGS(name, value)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -289,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {    \
 
 #undef __array
 #define __array(type, item, len)                                       \
-       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
-       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+       do {                                                            \
+               mutex_lock(&event_storage_mutex);                       \
+               BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                 \
+               snprintf(event_storage, sizeof(event_storage),          \
+                        "%s[%d]", #type, len);                         \
+               ret = trace_define_field(event_call, event_storage, #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
                                 is_signed_type(type), FILTER_OTHER);   \
-       if (ret)                                                        \
-               return ret;
+               mutex_unlock(&event_storage_mutex);                     \
+               if (ret)                                                \
+                       return ret;                                     \
+       } while (0);
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)                                      \
index 8646401f7a0e4b77579aa13f8de6ac191787be73..ea51770c01701e312f70f9a81357babbf93fb865 100644 (file)
@@ -67,6 +67,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
+#include <linux/perf_event.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -603,6 +604,8 @@ asmlinkage void __init start_kernel(void)
                                "enabled *very* early, fixing it\n");
                local_irq_disable();
        }
+       idr_init_cache();
+       perf_event_init();
        rcu_init();
        radix_tree_init();
        /* init some links before init_ISA_irqs() */
@@ -658,7 +661,6 @@ asmlinkage void __init start_kernel(void)
        enable_debug_pagealloc();
        kmemleak_init();
        debug_objects_mem_init();
-       idr_init_cache();
        setup_per_cpu_pageset();
        numa_policy_init();
        if (late_time_init)
@@ -882,6 +884,7 @@ static int __init kernel_init(void * unused)
        smp_prepare_cpus(setup_max_cpus);
 
        do_pre_smp_initcalls();
+       lockup_detector_init();
 
        smp_init();
        sched_init_smp();
index e5325825aeb6e1e4ea0514ee37cfa53412ec4e3c..086adf25a55e3aaecf3eb3172a7569b2c1a209e0 100644 (file)
@@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void)
 
        constraints_initialized = 1;
 
-       perf_pmu_register(&perf_breakpoint);
+       perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 
        return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
index 9737a76e106ff1554ecc2174f0e49a92b5badf45..7663e5df0e6f731f1804201a5e6cdf9b6162dd05 100644 (file)
@@ -354,13 +354,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
        return p->pre_handler == aggr_pre_handler;
 }
 
+/* Return true(!0) if the kprobe is unused */
+static inline int kprobe_unused(struct kprobe *p)
+{
+       return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
+              list_empty(&p->list);
+}
+
 /*
  * Keep all fields in the kprobe consistent
  */
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
 {
-       memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
-       memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+       memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
+       memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
 }
 
 #ifdef CONFIG_OPTPROBES
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
        }
 }
 
+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       op = container_of(p, struct optimized_kprobe, kp);
+       arch_remove_optimized_kprobe(op);
+       arch_remove_kprobe(p);
+       kfree(op);
+}
+
 /* Return true(!0) if the kprobe is ready for optimization. */
 static inline int kprobe_optready(struct kprobe *p)
 {
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
        return 0;
 }
 
+/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
+static inline int kprobe_disarmed(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
+       if (!kprobe_aggrprobe(p))
+               return kprobe_disabled(p);
+
+       op = container_of(p, struct optimized_kprobe, kp);
+
+       return kprobe_disabled(p) && list_empty(&op->list);
+}
+
+/* Return true(!0) if the probe is queued on (un)optimizing lists */
+static int __kprobes kprobe_queued(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       if (kprobe_aggrprobe(p)) {
+               op = container_of(p, struct optimized_kprobe, kp);
+               if (!list_empty(&op->list))
+                       return 1;
+       }
+       return 0;
+}
+
 /*
  * Return an optimized kprobe whose optimizing code replaces
  * instructions including addr (exclude breakpoint).
@@ -422,30 +467,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
 
 /* Optimization staging list, protected by kprobe_mutex */
 static LIST_HEAD(optimizing_list);
+static LIST_HEAD(unoptimizing_list);
 
 static void kprobe_optimizer(struct work_struct *work);
 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+static DECLARE_COMPLETION(optimizer_comp);
 #define OPTIMIZE_DELAY 5
 
-/* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+/*
+ * Optimize (replace a breakpoint with a jump) kprobes listed on
+ * optimizing_list.
+ */
+static __kprobes void do_optimize_kprobes(void)
 {
-       struct optimized_kprobe *op, *tmp;
-
-       /* Lock modules while optimizing kprobes */
-       mutex_lock(&module_mutex);
-       mutex_lock(&kprobe_mutex);
-       if (kprobes_all_disarmed || !kprobes_allow_optimization)
-               goto end;
-
-       /*
-        * Wait for quiesence period to ensure all running interrupts
-        * are done. Because optprobe may modify multiple instructions
-        * there is a chance that Nth instruction is interrupted. In that
-        * case, running interrupt can return to 2nd-Nth byte of jump
-        * instruction. This wait is for avoiding it.
-        */
-       synchronize_sched();
+       /* Optimization never be done when disarmed */
+       if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+           list_empty(&optimizing_list))
+               return;
 
        /*
         * The optimization/unoptimization refers online_cpus via
@@ -459,17 +497,111 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
         */
        get_online_cpus();
        mutex_lock(&text_mutex);
-       list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
-               WARN_ON(kprobe_disabled(&op->kp));
-               if (arch_optimize_kprobe(op) < 0)
-                       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-               list_del_init(&op->list);
+       arch_optimize_kprobes(&optimizing_list);
+       mutex_unlock(&text_mutex);
+       put_online_cpus();
+}
+
+/*
+ * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
+ * if need) kprobes listed on unoptimizing_list.
+ */
+static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
+{
+       struct optimized_kprobe *op, *tmp;
+
+       /* Unoptimization must be done anytime */
+       if (list_empty(&unoptimizing_list))
+               return;
+
+       /* Ditto to do_optimize_kprobes */
+       get_online_cpus();
+       mutex_lock(&text_mutex);
+       arch_unoptimize_kprobes(&unoptimizing_list, free_list);
+       /* Loop free_list for disarming */
+       list_for_each_entry_safe(op, tmp, free_list, list) {
+               /* Disarm probes if marked disabled */
+               if (kprobe_disabled(&op->kp))
+                       arch_disarm_kprobe(&op->kp);
+               if (kprobe_unused(&op->kp)) {
+                       /*
+                        * Remove unused probes from hash list. After waiting
+                        * for synchronization, these probes are reclaimed.
+                        * (reclaiming is done by do_free_cleaned_kprobes.)
+                        */
+                       hlist_del_rcu(&op->kp.hlist);
+               } else
+                       list_del_init(&op->list);
        }
        mutex_unlock(&text_mutex);
        put_online_cpus();
-end:
+}
+
+/* Reclaim all kprobes on the free_list */
+static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
+{
+       struct optimized_kprobe *op, *tmp;
+
+       list_for_each_entry_safe(op, tmp, free_list, list) {
+               BUG_ON(!kprobe_unused(&op->kp));
+               list_del_init(&op->list);
+               free_aggr_kprobe(&op->kp);
+       }
+}
+
+/* Start optimizer after OPTIMIZE_DELAY passed */
+static __kprobes void kick_kprobe_optimizer(void)
+{
+       if (!delayed_work_pending(&optimizing_work))
+               schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+       LIST_HEAD(free_list);
+
+       /* Lock modules while optimizing kprobes */
+       mutex_lock(&module_mutex);
+       mutex_lock(&kprobe_mutex);
+
+       /*
+        * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
+        * kprobes before waiting for quiesence period.
+        */
+       do_unoptimize_kprobes(&free_list);
+
+       /*
+        * Step 2: Wait for quiesence period to ensure all running interrupts
+        * are done. Because optprobe may modify multiple instructions
+        * there is a chance that Nth instruction is interrupted. In that
+        * case, running interrupt can return to 2nd-Nth byte of jump
+        * instruction. This wait is for avoiding it.
+        */
+       synchronize_sched();
+
+       /* Step 3: Optimize kprobes after quiesence period */
+       do_optimize_kprobes();
+
+       /* Step 4: Free cleaned kprobes after quiesence period */
+       do_free_cleaned_kprobes(&free_list);
+
        mutex_unlock(&kprobe_mutex);
        mutex_unlock(&module_mutex);
+
+       /* Step 5: Kick optimizer again if needed */
+       if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
+               kick_kprobe_optimizer();
+       else
+               /* Wake up all waiters */
+               complete_all(&optimizer_comp);
+}
+
+/* Wait for completing optimization and unoptimization */
+static __kprobes void wait_for_kprobe_optimizer(void)
+{
+       if (delayed_work_pending(&optimizing_work))
+               wait_for_completion(&optimizer_comp);
 }
 
 /* Optimize kprobe if p is ready to be optimized */
@@ -495,42 +627,99 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
        /* Check if it is already optimized. */
        if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
                return;
-
        op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
-       list_add(&op->list, &optimizing_list);
-       if (!delayed_work_pending(&optimizing_work))
-               schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+
+       if (!list_empty(&op->list))
+               /* This is under unoptimizing. Just dequeue the probe */
+               list_del_init(&op->list);
+       else {
+               list_add(&op->list, &optimizing_list);
+               kick_kprobe_optimizer();
+       }
+}
+
+/* Short cut to direct unoptimizing */
+static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+       get_online_cpus();
+       arch_unoptimize_kprobe(op);
+       put_online_cpus();
+       if (kprobe_disabled(&op->kp))
+               arch_disarm_kprobe(&op->kp);
 }
 
 /* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p)
+static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
 {
        struct optimized_kprobe *op;
 
-       if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
-               op = container_of(p, struct optimized_kprobe, kp);
-               if (!list_empty(&op->list))
-                       /* Dequeue from the optimization queue */
+       if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
+               return; /* This is not an optprobe nor optimized */
+
+       op = container_of(p, struct optimized_kprobe, kp);
+       if (!kprobe_optimized(p)) {
+               /* Unoptimized or unoptimizing case */
+               if (force && !list_empty(&op->list)) {
+                       /*
+                        * Only if this is unoptimizing kprobe and forced,
+                        * forcibly unoptimize it. (No need to unoptimize
+                        * unoptimized kprobe again :)
+                        */
                        list_del_init(&op->list);
-               else
-                       /* Replace jump with break */
-                       arch_unoptimize_kprobe(op);
-               op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+                       force_unoptimize_kprobe(op);
+               }
+               return;
+       }
+
+       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+       if (!list_empty(&op->list)) {
+               /* Dequeue from the optimization queue */
+               list_del_init(&op->list);
+               return;
+       }
+       /* Optimized kprobe case */
+       if (force)
+               /* Forcibly update the code: this is a special case */
+               force_unoptimize_kprobe(op);
+       else {
+               list_add(&op->list, &unoptimizing_list);
+               kick_kprobe_optimizer();
        }
 }
 
+/* Cancel unoptimizing for reusing */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+       struct optimized_kprobe *op;
+
+       BUG_ON(!kprobe_unused(ap));
+       /*
+        * Unused kprobe MUST be on the way of delayed unoptimizing (means
+        * there is still a relative jump) and disabled.
+        */
+       op = container_of(ap, struct optimized_kprobe, kp);
+       if (unlikely(list_empty(&op->list)))
+               printk(KERN_WARNING "Warning: found a stray unused "
+                       "aggrprobe@%p\n", ap->addr);
+       /* Enable the probe again */
+       ap->flags &= ~KPROBE_FLAG_DISABLED;
+       /* Optimize it again (remove from op->list) */
+       BUG_ON(!kprobe_optready(ap));
+       optimize_kprobe(ap);
+}
+
 /* Remove optimized instructions */
 static void __kprobes kill_optimized_kprobe(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
        op = container_of(p, struct optimized_kprobe, kp);
-       if (!list_empty(&op->list)) {
-               /* Dequeue from the optimization queue */
+       if (!list_empty(&op->list))
+               /* Dequeue from the (un)optimization queue */
                list_del_init(&op->list);
-               op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-       }
-       /* Don't unoptimize, because the target code will be freed. */
+
+       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+       /* Don't touch the code, because it is already freed. */
        arch_remove_optimized_kprobe(op);
 }
 
@@ -543,16 +732,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
        arch_prepare_optimized_kprobe(op);
 }
 
-/* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
-{
-       struct optimized_kprobe *op;
-
-       op = container_of(p, struct optimized_kprobe, kp);
-       arch_remove_optimized_kprobe(op);
-       kfree(op);
-}
-
 /* Allocate new optimized_kprobe and try to prepare optimized instructions */
 static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
@@ -587,7 +766,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
        op = container_of(ap, struct optimized_kprobe, kp);
        if (!arch_prepared_optinsn(&op->optinsn)) {
                /* If failed to setup optimizing, fallback to kprobe */
-               free_aggr_kprobe(ap);
+               arch_remove_optimized_kprobe(op);
+               kfree(op);
                return;
        }
 
@@ -631,21 +811,16 @@ static void __kprobes unoptimize_all_kprobes(void)
                return;
 
        kprobes_allow_optimization = false;
-       printk(KERN_INFO "Kprobes globally unoptimized\n");
-       get_online_cpus();      /* For avoiding text_mutex deadlock */
-       mutex_lock(&text_mutex);
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist) {
                        if (!kprobe_disabled(p))
-                               unoptimize_kprobe(p);
+                               unoptimize_kprobe(p, false);
                }
        }
-
-       mutex_unlock(&text_mutex);
-       put_online_cpus();
-       /* Allow all currently running kprobes to complete */
-       synchronize_sched();
+       /* Wait for unoptimizing completion */
+       wait_for_kprobe_optimizer();
+       printk(KERN_INFO "Kprobes globally unoptimized\n");
 }
 
 int sysctl_kprobes_optimization;
@@ -669,44 +844,60 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
 }
 #endif /* CONFIG_SYSCTL */
 
+/* Put a breakpoint for a probe. Must be called with text_mutex locked */
 static void __kprobes __arm_kprobe(struct kprobe *p)
 {
-       struct kprobe *old_p;
+       struct kprobe *_p;
 
        /* Check collision with other optimized kprobes */
-       old_p = get_optimized_kprobe((unsigned long)p->addr);
-       if (unlikely(old_p))
-               unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+       _p = get_optimized_kprobe((unsigned long)p->addr);
+       if (unlikely(_p))
+               /* Fallback to unoptimized kprobe */
+               unoptimize_kprobe(_p, true);
 
        arch_arm_kprobe(p);
        optimize_kprobe(p);     /* Try to optimize (add kprobe to a list) */
 }
 
-static void __kprobes __disarm_kprobe(struct kprobe *p)
+/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
+static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
 {
-       struct kprobe *old_p;
+       struct kprobe *_p;
 
-       unoptimize_kprobe(p);   /* Try to unoptimize */
-       arch_disarm_kprobe(p);
+       unoptimize_kprobe(p, false);    /* Try to unoptimize */
 
-       /* If another kprobe was blocked, optimize it. */
-       old_p = get_optimized_kprobe((unsigned long)p->addr);
-       if (unlikely(old_p))
-               optimize_kprobe(old_p);
+       if (!kprobe_queued(p)) {
+               arch_disarm_kprobe(p);
+               /* If another kprobe was blocked, optimize it. */
+               _p = get_optimized_kprobe((unsigned long)p->addr);
+               if (unlikely(_p) && reopt)
+                       optimize_kprobe(_p);
+       }
+       /* TODO: reoptimize others after unoptimized this probe */
 }
 
 #else /* !CONFIG_OPTPROBES */
 
 #define optimize_kprobe(p)                     do {} while (0)
-#define unoptimize_kprobe(p)                   do {} while (0)
+#define unoptimize_kprobe(p, f)                        do {} while (0)
 #define kill_optimized_kprobe(p)               do {} while (0)
 #define prepare_optimized_kprobe(p)            do {} while (0)
 #define try_to_optimize_kprobe(p)              do {} while (0)
 #define __arm_kprobe(p)                                arch_arm_kprobe(p)
-#define __disarm_kprobe(p)                     arch_disarm_kprobe(p)
+#define __disarm_kprobe(p, o)                  arch_disarm_kprobe(p)
+#define kprobe_disarmed(p)                     kprobe_disabled(p)
+#define wait_for_kprobe_optimizer()            do {} while (0)
+
+/* There should be no unused kprobes can be reused without optimization */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+       printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
+       BUG_ON(kprobe_unused(ap));
+}
 
 static __kprobes void free_aggr_kprobe(struct kprobe *p)
 {
+       arch_remove_kprobe(p);
        kfree(p);
 }
 
@@ -732,11 +923,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
 /* Disarm a kprobe with text_mutex */
 static void __kprobes disarm_kprobe(struct kprobe *kp)
 {
-       get_online_cpus();      /* For avoiding text_mutex deadlock */
+       /* Ditto */
        mutex_lock(&text_mutex);
-       __disarm_kprobe(kp);
+       __disarm_kprobe(kp, true);
        mutex_unlock(&text_mutex);
-       put_online_cpus();
 }
 
 /*
@@ -942,7 +1132,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
        BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
 
        if (p->break_handler || p->post_handler)
-               unoptimize_kprobe(ap);  /* Fall back to normal kprobe */
+               unoptimize_kprobe(ap, true);    /* Fall back to normal kprobe */
 
        if (p->break_handler) {
                if (ap->break_handler)
@@ -993,19 +1183,21 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
  * This is the second or subsequent kprobe at the address - handle
  * the intricacies
  */
-static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
+static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
                                          struct kprobe *p)
 {
        int ret = 0;
-       struct kprobe *ap = old_p;
+       struct kprobe *ap = orig_p;
 
-       if (!kprobe_aggrprobe(old_p)) {
-               /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
-               ap = alloc_aggr_kprobe(old_p);
+       if (!kprobe_aggrprobe(orig_p)) {
+               /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
+               ap = alloc_aggr_kprobe(orig_p);
                if (!ap)
                        return -ENOMEM;
-               init_aggr_kprobe(ap, old_p);
-       }
+               init_aggr_kprobe(ap, orig_p);
+       } else if (kprobe_unused(ap))
+               /* This probe is going to die. Rescue it */
+               reuse_unused_kprobe(ap);
 
        if (kprobe_gone(ap)) {
                /*
@@ -1039,23 +1231,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
        return add_new_kprobe(ap, p);
 }
 
-/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
-static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
-{
-       struct kprobe *kp;
-
-       list_for_each_entry_rcu(kp, &p->list, list) {
-               if (!kprobe_disabled(kp))
-                       /*
-                        * There is an active probe on the list.
-                        * We can't disable aggr_kprobe.
-                        */
-                       return 0;
-       }
-       p->flags |= KPROBE_FLAG_DISABLED;
-       return 1;
-}
-
 static int __kprobes in_kprobes_functions(unsigned long addr)
 {
        struct kprobe_blackpoint *kb;
@@ -1098,34 +1273,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
 static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
 {
-       struct kprobe *old_p, *list_p;
+       struct kprobe *ap, *list_p;
 
-       old_p = get_kprobe(p->addr);
-       if (unlikely(!old_p))
+       ap = get_kprobe(p->addr);
+       if (unlikely(!ap))
                return NULL;
 
-       if (p != old_p) {
-               list_for_each_entry_rcu(list_p, &old_p->list, list)
+       if (p != ap) {
+               list_for_each_entry_rcu(list_p, &ap->list, list)
                        if (list_p == p)
                        /* kprobe p is a valid probe */
                                goto valid;
                return NULL;
        }
 valid:
-       return old_p;
+       return ap;
 }
 
 /* Return error if the kprobe is being re-registered */
 static inline int check_kprobe_rereg(struct kprobe *p)
 {
        int ret = 0;
-       struct kprobe *old_p;
 
        mutex_lock(&kprobe_mutex);
-       old_p = __get_valid_kprobe(p);
-       if (old_p)
+       if (__get_valid_kprobe(p))
                ret = -EINVAL;
        mutex_unlock(&kprobe_mutex);
+
        return ret;
 }
 
@@ -1229,67 +1403,121 @@ fail_with_jump_label:
 }
 EXPORT_SYMBOL_GPL(register_kprobe);
 
+/* Check if all probes on the aggrprobe are disabled */
+static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+{
+       struct kprobe *kp;
+
+       list_for_each_entry_rcu(kp, &ap->list, list)
+               if (!kprobe_disabled(kp))
+                       /*
+                        * There is an active probe on the list.
+                        * We can't disable this ap.
+                        */
+                       return 0;
+
+       return 1;
+}
+
+/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
+static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+{
+       struct kprobe *orig_p;
+
+       /* Get an original kprobe for return */
+       orig_p = __get_valid_kprobe(p);
+       if (unlikely(orig_p == NULL))
+               return NULL;
+
+       if (!kprobe_disabled(p)) {
+               /* Disable probe if it is a child probe */
+               if (p != orig_p)
+                       p->flags |= KPROBE_FLAG_DISABLED;
+
+               /* Try to disarm and disable this/parent probe */
+               if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
+                       disarm_kprobe(orig_p);
+                       orig_p->flags |= KPROBE_FLAG_DISABLED;
+               }
+       }
+
+       return orig_p;
+}
+
 /*
  * Unregister a kprobe without a scheduler synchronization.
  */
 static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 {
-       struct kprobe *old_p, *list_p;
+       struct kprobe *ap, *list_p;
 
-       old_p = __get_valid_kprobe(p);
-       if (old_p == NULL)
+       /* Disable kprobe. This will disarm it if needed. */
+       ap = __disable_kprobe(p);
+       if (ap == NULL)
                return -EINVAL;
 
-       if (old_p == p ||
-           (kprobe_aggrprobe(old_p) &&
-            list_is_singular(&old_p->list))) {
+       if (ap == p)
                /*
-                * Only probe on the hash list. Disarm only if kprobes are
-                * enabled and not gone - otherwise, the breakpoint would
-                * already have been removed. We save on flushing icache.
+                * This probe is an independent(and non-optimized) kprobe
+                * (not an aggrprobe). Remove from the hash list.
                 */
-               if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
-                       disarm_kprobe(old_p);
-               hlist_del_rcu(&old_p->hlist);
-       } else {
+               goto disarmed;
+
+       /* Following process expects this probe is an aggrprobe */
+       WARN_ON(!kprobe_aggrprobe(ap));
+
+       if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
+               /*
+                * !disarmed could be happen if the probe is under delayed
+                * unoptimizing.
+                */
+               goto disarmed;
+       else {
+               /* If disabling probe has special handlers, update aggrprobe */
                if (p->break_handler && !kprobe_gone(p))
-                       old_p->break_handler = NULL;
+                       ap->break_handler = NULL;
                if (p->post_handler && !kprobe_gone(p)) {
-                       list_for_each_entry_rcu(list_p, &old_p->list, list) {
+                       list_for_each_entry_rcu(list_p, &ap->list, list) {
                                if ((list_p != p) && (list_p->post_handler))
                                        goto noclean;
                        }
-                       old_p->post_handler = NULL;
+                       ap->post_handler = NULL;
                }
 noclean:
+               /*
+                * Remove from the aggrprobe: this path will do nothing in
+                * __unregister_kprobe_bottom().
+                */
                list_del_rcu(&p->list);
-               if (!kprobe_disabled(old_p)) {
-                       try_to_disable_aggr_kprobe(old_p);
-                       if (!kprobes_all_disarmed) {
-                               if (kprobe_disabled(old_p))
-                                       disarm_kprobe(old_p);
-                               else
-                                       /* Try to optimize this probe again */
-                                       optimize_kprobe(old_p);
-                       }
-               }
+               if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
+                       /*
+                        * Try to optimize this probe again, because post
+                        * handler may have been changed.
+                        */
+                       optimize_kprobe(ap);
        }
        return 0;
+
+disarmed:
+       BUG_ON(!kprobe_disarmed(ap));
+       hlist_del_rcu(&ap->hlist);
+       return 0;
 }
 
 static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 {
-       struct kprobe *old_p;
+       struct kprobe *ap;
 
        if (list_empty(&p->list))
+               /* This is an independent kprobe */
                arch_remove_kprobe(p);
        else if (list_is_singular(&p->list)) {
-               /* "p" is the last child of an aggr_kprobe */
-               old_p = list_entry(p->list.next, struct kprobe, list);
+               /* This is the last child of an aggrprobe */
+               ap = list_entry(p->list.next, struct kprobe, list);
                list_del(&p->list);
-               arch_remove_kprobe(old_p);
-               free_aggr_kprobe(old_p);
+               free_aggr_kprobe(ap);
        }
+       /* Otherwise, do nothing. */
 }
 
 int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1607,29 +1835,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 int __kprobes disable_kprobe(struct kprobe *kp)
 {
        int ret = 0;
-       struct kprobe *p;
 
        mutex_lock(&kprobe_mutex);
 
-       /* Check whether specified probe is valid. */
-       p = __get_valid_kprobe(kp);
-       if (unlikely(p == NULL)) {
+       /* Disable this kprobe */
+       if (__disable_kprobe(kp) == NULL)
                ret = -EINVAL;
-               goto out;
-       }
 
-       /* If the probe is already disabled (or gone), just return */
-       if (kprobe_disabled(kp))
-               goto out;
-
-       kp->flags |= KPROBE_FLAG_DISABLED;
-       if (p != kp)
-               /* When kp != p, p is always enabled. */
-               try_to_disable_aggr_kprobe(p);
-
-       if (!kprobes_all_disarmed && kprobe_disabled(p))
-               disarm_kprobe(p);
-out:
        mutex_unlock(&kprobe_mutex);
        return ret;
 }
@@ -1927,36 +2139,27 @@ static void __kprobes disarm_all_kprobes(void)
        mutex_lock(&kprobe_mutex);
 
        /* If kprobes are already disarmed, just return */
-       if (kprobes_all_disarmed)
-               goto already_disabled;
+       if (kprobes_all_disarmed) {
+               mutex_unlock(&kprobe_mutex);
+               return;
+       }
 
        kprobes_all_disarmed = true;
        printk(KERN_INFO "Kprobes globally disabled\n");
 
-       /*
-        * Here we call get_online_cpus() for avoiding text_mutex deadlock,
-        * because disarming may also unoptimize kprobes.
-        */
-       get_online_cpus();
        mutex_lock(&text_mutex);
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist) {
                        if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-                               __disarm_kprobe(p);
+                               __disarm_kprobe(p, false);
                }
        }
-
        mutex_unlock(&text_mutex);
-       put_online_cpus();
        mutex_unlock(&kprobe_mutex);
-       /* Allow all currently running kprobes to complete */
-       synchronize_sched();
-       return;
 
-already_disabled:
-       mutex_unlock(&kprobe_mutex);
-       return;
+       /* Wait for disarming all kprobes by optimizer */
+       wait_for_kprobe_optimizer();
 }
 
 /*
index 2870feee81dd7a046703645c9ec50022d4339f39..11847bf1e8cc254db7f2a2a255511fd36eea4a68 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
+#include <linux/idr.h>
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
@@ -21,7 +22,9 @@
 #include <linux/dcache.h>
 #include <linux/percpu.h>
 #include <linux/ptrace.h>
+#include <linux/reboot.h>
 #include <linux/vmstat.h>
+#include <linux/device.h>
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
@@ -133,6 +136,28 @@ static void unclone_ctx(struct perf_event_context *ctx)
        }
 }
 
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+       /*
+        * only top level events have the pid namespace they were created in
+        */
+       if (event->parent)
+               event = event->parent;
+
+       return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+       /*
+        * only top level events have the pid namespace they were created in
+        */
+       if (event->parent)
+               event = event->parent;
+
+       return task_pid_nr_ns(p, event->ns);
+}
+
 /*
  * If we inherit events we want to return the parent event id
  * to userspace.
@@ -312,9 +337,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
                ctx->nr_stat++;
 }
 
+/*
+ * Called at perf_event creation and when events are attached/detached from a
+ * group.
+ */
+static void perf_event__read_size(struct perf_event *event)
+{
+       int entry = sizeof(u64); /* value */
+       int size = 0;
+       int nr = 1;
+
+       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               size += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               size += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_ID)
+               entry += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_GROUP) {
+               nr += event->group_leader->nr_siblings;
+               size += sizeof(u64);
+       }
+
+       size += entry * nr;
+       event->read_size = size;
+}
+
+static void perf_event__header_size(struct perf_event *event)
+{
+       struct perf_sample_data *data;
+       u64 sample_type = event->attr.sample_type;
+       u16 size = 0;
+
+       perf_event__read_size(event);
+
+       if (sample_type & PERF_SAMPLE_IP)
+               size += sizeof(data->ip);
+
+       if (sample_type & PERF_SAMPLE_ADDR)
+               size += sizeof(data->addr);
+
+       if (sample_type & PERF_SAMPLE_PERIOD)
+               size += sizeof(data->period);
+
+       if (sample_type & PERF_SAMPLE_READ)
+               size += event->read_size;
+
+       event->header_size = size;
+}
+
+static void perf_event__id_header_size(struct perf_event *event)
+{
+       struct perf_sample_data *data;
+       u64 sample_type = event->attr.sample_type;
+       u16 size = 0;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               size += sizeof(data->tid_entry);
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               size += sizeof(data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               size += sizeof(data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               size += sizeof(data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               size += sizeof(data->cpu_entry);
+
+       event->id_header_size = size;
+}
+
 static void perf_group_attach(struct perf_event *event)
 {
-       struct perf_event *group_leader = event->group_leader;
+       struct perf_event *group_leader = event->group_leader, *pos;
 
        /*
         * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +433,11 @@ static void perf_group_attach(struct perf_event *event)
 
        list_add_tail(&event->group_entry, &group_leader->sibling_list);
        group_leader->nr_siblings++;
+
+       perf_event__header_size(group_leader);
+
+       list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
+               perf_event__header_size(pos);
 }
 
 /*
@@ -391,7 +496,7 @@ static void perf_group_detach(struct perf_event *event)
        if (event->group_leader != event) {
                list_del_init(&event->group_entry);
                event->group_leader->nr_siblings--;
-               return;
+               goto out;
        }
 
        if (!list_empty(&event->group_entry))
@@ -410,6 +515,12 @@ static void perf_group_detach(struct perf_event *event)
                /* Inherit group flags from the previous leader */
                sibling->group_flags = event->group_flags;
        }
+
+out:
+       perf_event__header_size(event->group_leader);
+
+       list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
+               perf_event__header_size(tmp);
 }
 
 static inline int
@@ -1073,7 +1184,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
        /*
         * not supported on inherited events
         */
-       if (event->attr.inherit)
+       if (event->attr.inherit || !is_sampling_event(event))
                return -EINVAL;
 
        atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2400,6 @@ static int perf_release(struct inode *inode, struct file *file)
        return perf_event_release_kernel(event);
 }
 
-static int perf_event_read_size(struct perf_event *event)
-{
-       int entry = sizeof(u64); /* value */
-       int size = 0;
-       int nr = 1;
-
-       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-               size += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-               size += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_ID)
-               entry += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_GROUP) {
-               nr += event->group_leader->nr_siblings;
-               size += sizeof(u64);
-       }
-
-       size += entry * nr;
-
-       return size;
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
@@ -2428,7 +2514,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
        if (event->state == PERF_EVENT_STATE_ERROR)
                return 0;
 
-       if (count < perf_event_read_size(event))
+       if (count < event->read_size)
                return -ENOSPC;
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2600,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
        int ret = 0;
        u64 value;
 
-       if (!event->attr.sample_period)
+       if (!is_sampling_event(event))
                return -EINVAL;
 
        if (copy_from_user(&value, arg, sizeof(value)))
@@ -3305,6 +3391,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
        } while (len);
 }
 
+static void __perf_event_header__init_id(struct perf_event_header *header,
+                                        struct perf_sample_data *data,
+                                        struct perf_event *event)
+{
+       u64 sample_type = event->attr.sample_type;
+
+       data->type = sample_type;
+       header->size += event->id_header_size;
+
+       if (sample_type & PERF_SAMPLE_TID) {
+               /* namespace issues */
+               data->tid_entry.pid = perf_event_pid(event, current);
+               data->tid_entry.tid = perf_event_tid(event, current);
+       }
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               data->time = perf_clock();
+
+       if (sample_type & PERF_SAMPLE_ID)
+               data->id = primary_event_id(event);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               data->stream_id = event->id;
+
+       if (sample_type & PERF_SAMPLE_CPU) {
+               data->cpu_entry.cpu      = raw_smp_processor_id();
+               data->cpu_entry.reserved = 0;
+       }
+}
+
+static void perf_event_header__init_id(struct perf_event_header *header,
+                                      struct perf_sample_data *data,
+                                      struct perf_event *event)
+{
+       if (event->attr.sample_id_all)
+               __perf_event_header__init_id(header, data, event);
+}
+
+static void __perf_event__output_id_sample(struct perf_output_handle *handle,
+                                          struct perf_sample_data *data)
+{
+       u64 sample_type = data->type;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               perf_output_put(handle, data->tid_entry);
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               perf_output_put(handle, data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               perf_output_put(handle, data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               perf_output_put(handle, data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               perf_output_put(handle, data->cpu_entry);
+}
+
+static void perf_event__output_id_sample(struct perf_event *event,
+                                        struct perf_output_handle *handle,
+                                        struct perf_sample_data *sample)
+{
+       if (event->attr.sample_id_all)
+               __perf_event__output_id_sample(handle, sample);
+}
+
 int perf_output_begin(struct perf_output_handle *handle,
                      struct perf_event *event, unsigned int size,
                      int nmi, int sample)
@@ -3312,6 +3465,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        struct perf_buffer *buffer;
        unsigned long tail, offset, head;
        int have_lost;
+       struct perf_sample_data sample_data;
        struct {
                struct perf_event_header header;
                u64                      id;
@@ -3338,8 +3492,12 @@ int perf_output_begin(struct perf_output_handle *handle,
                goto out;
 
        have_lost = local_read(&buffer->lost);
-       if (have_lost)
-               size += sizeof(lost_event);
+       if (have_lost) {
+               lost_event.header.size = sizeof(lost_event);
+               perf_event_header__init_id(&lost_event.header, &sample_data,
+                                          event);
+               size += lost_event.header.size;
+       }
 
        perf_output_get_handle(handle);
 
@@ -3370,11 +3528,11 @@ int perf_output_begin(struct perf_output_handle *handle,
        if (have_lost) {
                lost_event.header.type = PERF_RECORD_LOST;
                lost_event.header.misc = 0;
-               lost_event.header.size = sizeof(lost_event);
                lost_event.id          = event->id;
                lost_event.lost        = local_xchg(&buffer->lost, 0);
 
                perf_output_put(handle, lost_event);
+               perf_event__output_id_sample(event, handle, &sample_data);
        }
 
        return 0;
@@ -3407,28 +3565,6 @@ void perf_output_end(struct perf_output_handle *handle)
        rcu_read_unlock();
 }
 
-static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
-{
-       /*
-        * only top level events have the pid namespace they were created in
-        */
-       if (event->parent)
-               event = event->parent;
-
-       return task_tgid_nr_ns(p, event->ns);
-}
-
-static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
-{
-       /*
-        * only top level events have the pid namespace they were created in
-        */
-       if (event->parent)
-               event = event->parent;
-
-       return task_pid_nr_ns(p, event->ns);
-}
-
 static void perf_output_read_one(struct perf_output_handle *handle,
                                 struct perf_event *event,
                                 u64 enabled, u64 running)
@@ -3603,61 +3739,16 @@ void perf_prepare_sample(struct perf_event_header *header,
 {
        u64 sample_type = event->attr.sample_type;
 
-       data->type = sample_type;
-
        header->type = PERF_RECORD_SAMPLE;
-       header->size = sizeof(*header);
+       header->size = sizeof(*header) + event->header_size;
 
        header->misc = 0;
        header->misc |= perf_misc_flags(regs);
 
-       if (sample_type & PERF_SAMPLE_IP) {
-               data->ip = perf_instruction_pointer(regs);
-
-               header->size += sizeof(data->ip);
-       }
-
-       if (sample_type & PERF_SAMPLE_TID) {
-               /* namespace issues */
-               data->tid_entry.pid = perf_event_pid(event, current);
-               data->tid_entry.tid = perf_event_tid(event, current);
-
-               header->size += sizeof(data->tid_entry);
-       }
-
-       if (sample_type & PERF_SAMPLE_TIME) {
-               data->time = perf_clock();
-
-               header->size += sizeof(data->time);
-       }
-
-       if (sample_type & PERF_SAMPLE_ADDR)
-               header->size += sizeof(data->addr);
-
-       if (sample_type & PERF_SAMPLE_ID) {
-               data->id = primary_event_id(event);
-
-               header->size += sizeof(data->id);
-       }
-
-       if (sample_type & PERF_SAMPLE_STREAM_ID) {
-               data->stream_id = event->id;
-
-               header->size += sizeof(data->stream_id);
-       }
-
-       if (sample_type & PERF_SAMPLE_CPU) {
-               data->cpu_entry.cpu             = raw_smp_processor_id();
-               data->cpu_entry.reserved        = 0;
-
-               header->size += sizeof(data->cpu_entry);
-       }
-
-       if (sample_type & PERF_SAMPLE_PERIOD)
-               header->size += sizeof(data->period);
+       __perf_event_header__init_id(header, data, event);
 
-       if (sample_type & PERF_SAMPLE_READ)
-               header->size += perf_event_read_size(event);
+       if (sample_type & PERF_SAMPLE_IP)
+               data->ip = perf_instruction_pointer(regs);
 
        if (sample_type & PERF_SAMPLE_CALLCHAIN) {
                int size = 1;
@@ -3722,23 +3813,26 @@ perf_event_read_event(struct perf_event *event,
                        struct task_struct *task)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        struct perf_read_event read_event = {
                .header = {
                        .type = PERF_RECORD_READ,
                        .misc = 0,
-                       .size = sizeof(read_event) + perf_event_read_size(event),
+                       .size = sizeof(read_event) + event->read_size,
                },
                .pid = perf_event_pid(event, task),
                .tid = perf_event_tid(event, task),
        };
        int ret;
 
+       perf_event_header__init_id(&read_event.header, &sample, event);
        ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
        if (ret)
                return;
 
        perf_output_put(&handle, read_event);
        perf_output_read(&handle, event);
+       perf_event__output_id_sample(event, &handle, &sample);
 
        perf_output_end(&handle);
 }
@@ -3768,14 +3862,16 @@ static void perf_event_task_output(struct perf_event *event,
                                     struct perf_task_event *task_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        struct task_struct *task = task_event->task;
-       int size, ret;
+       int ret, size = task_event->event_id.header.size;
 
-       size  = task_event->event_id.header.size;
-       ret = perf_output_begin(&handle, event, size, 0, 0);
+       perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
+       ret = perf_output_begin(&handle, event,
+                               task_event->event_id.header.size, 0, 0);
        if (ret)
-               return;
+               goto out;
 
        task_event->event_id.pid = perf_event_pid(event, task);
        task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3785,7 +3881,11 @@ static void perf_event_task_output(struct perf_event *event,
 
        perf_output_put(&handle, task_event->event_id);
 
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       task_event->event_id.header.size = size;
 }
 
 static int perf_event_task_match(struct perf_event *event)
@@ -3900,11 +4000,16 @@ static void perf_event_comm_output(struct perf_event *event,
                                     struct perf_comm_event *comm_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int size = comm_event->event_id.header.size;
-       int ret = perf_output_begin(&handle, event, size, 0, 0);
+       int ret;
+
+       perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
+       ret = perf_output_begin(&handle, event,
+                               comm_event->event_id.header.size, 0, 0);
 
        if (ret)
-               return;
+               goto out;
 
        comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
        comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3912,7 +4017,12 @@ static void perf_event_comm_output(struct perf_event *event,
        perf_output_put(&handle, comm_event->event_id);
        perf_output_copy(&handle, comm_event->comm,
                                   comm_event->comm_size);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       comm_event->event_id.header.size = size;
 }
 
 static int perf_event_comm_match(struct perf_event *event)
@@ -3957,7 +4067,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
        comm_event->comm_size = size;
 
        comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
-
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
@@ -4038,11 +4147,15 @@ static void perf_event_mmap_output(struct perf_event *event,
                                     struct perf_mmap_event *mmap_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int size = mmap_event->event_id.header.size;
-       int ret = perf_output_begin(&handle, event, size, 0, 0);
+       int ret;
 
+       perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
+       ret = perf_output_begin(&handle, event,
+                               mmap_event->event_id.header.size, 0, 0);
        if (ret)
-               return;
+               goto out;
 
        mmap_event->event_id.pid = perf_event_pid(event, current);
        mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4050,7 +4163,12 @@ static void perf_event_mmap_output(struct perf_event *event,
        perf_output_put(&handle, mmap_event->event_id);
        perf_output_copy(&handle, mmap_event->file_name,
                                   mmap_event->file_size);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       mmap_event->event_id.header.size = size;
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
@@ -4205,6 +4323,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
 static void perf_log_throttle(struct perf_event *event, int enable)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int ret;
 
        struct {
@@ -4226,11 +4345,15 @@ static void perf_log_throttle(struct perf_event *event, int enable)
        if (enable)
                throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
 
-       ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+       perf_event_header__init_id(&throttle_event.header, &sample, event);
+
+       ret = perf_output_begin(&handle, event,
+                               throttle_event.header.size, 1, 0);
        if (ret)
                return;
 
        perf_output_put(&handle, throttle_event);
+       perf_event__output_id_sample(event, &handle, &sample);
        perf_output_end(&handle);
 }
 
@@ -4246,6 +4369,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
        struct hw_perf_event *hwc = &event->hw;
        int ret = 0;
 
+       /*
+        * Non-sampling counters might still use the PMI to fold short
+        * hardware counters, ignore those.
+        */
+       if (unlikely(!is_sampling_event(event)))
+               return 0;
+
        if (!throttle) {
                hwc->interrupts++;
        } else {
@@ -4391,7 +4521,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
        if (!regs)
                return;
 
-       if (!hwc->sample_period)
+       if (!is_sampling_event(event))
                return;
 
        if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4554,7 +4684,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
        struct hw_perf_event *hwc = &event->hw;
        struct hlist_head *head;
 
-       if (hwc->sample_period) {
+       if (is_sampling_event(event)) {
                hwc->last_period = hwc->sample_period;
                perf_swevent_set_period(event);
        }
@@ -4811,15 +4941,6 @@ static int perf_tp_event_init(struct perf_event *event)
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -ENOENT;
 
-       /*
-        * Raw tracepoint data is a severe data leak, only allow root to
-        * have these.
-        */
-       if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
-                       perf_paranoid_tracepoint_raw() &&
-                       !capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
        err = perf_trace_init(event);
        if (err)
                return err;
@@ -4842,7 +4963,7 @@ static struct pmu perf_tracepoint = {
 
 static inline void perf_tp_register(void)
 {
-       perf_pmu_register(&perf_tracepoint);
+       perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
 }
 
 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4932,31 +5053,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 static void perf_swevent_start_hrtimer(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
+       s64 period;
+
+       if (!is_sampling_event(event))
+               return;
 
        hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        hwc->hrtimer.function = perf_swevent_hrtimer;
-       if (hwc->sample_period) {
-               s64 period = local64_read(&hwc->period_left);
 
-               if (period) {
-                       if (period < 0)
-                               period = 10000;
+       period = local64_read(&hwc->period_left);
+       if (period) {
+               if (period < 0)
+                       period = 10000;
 
-                       local64_set(&hwc->period_left, 0);
-               } else {
-                       period = max_t(u64, 10000, hwc->sample_period);
-               }
-               __hrtimer_start_range_ns(&hwc->hrtimer,
+               local64_set(&hwc->period_left, 0);
+       } else {
+               period = max_t(u64, 10000, hwc->sample_period);
+       }
+       __hrtimer_start_range_ns(&hwc->hrtimer,
                                ns_to_ktime(period), 0,
                                HRTIMER_MODE_REL_PINNED, 0);
-       }
 }
 
 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
 
-       if (hwc->sample_period) {
+       if (is_sampling_event(event)) {
                ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
                local64_set(&hwc->period_left, ktime_to_ns(remaining));
 
@@ -5184,8 +5307,61 @@ static void free_pmu_context(struct pmu *pmu)
 out:
        mutex_unlock(&pmus_lock);
 }
+static struct idr pmu_idr;
+
+static ssize_t
+type_show(struct device *dev, struct device_attribute *attr, char *page)
+{
+       struct pmu *pmu = dev_get_drvdata(dev);
+
+       return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
+}
+
+static struct device_attribute pmu_dev_attrs[] = {
+       __ATTR_RO(type),
+       __ATTR_NULL,
+};
+
+static int pmu_bus_running;
+static struct bus_type pmu_bus = {
+       .name           = "event_source",
+       .dev_attrs      = pmu_dev_attrs,
+};
+
+static void pmu_dev_release(struct device *dev)
+{
+       kfree(dev);
+}
+
+static int pmu_dev_alloc(struct pmu *pmu)
+{
+       int ret = -ENOMEM;
+
+       pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+       if (!pmu->dev)
+               goto out;
+
+       device_initialize(pmu->dev);
+       ret = dev_set_name(pmu->dev, "%s", pmu->name);
+       if (ret)
+               goto free_dev;
+
+       dev_set_drvdata(pmu->dev, pmu);
+       pmu->dev->bus = &pmu_bus;
+       pmu->dev->release = pmu_dev_release;
+       ret = device_add(pmu->dev);
+       if (ret)
+               goto free_dev;
+
+out:
+       return ret;
+
+free_dev:
+       put_device(pmu->dev);
+       goto out;
+}
 
-int perf_pmu_register(struct pmu *pmu)
+int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
        int cpu, ret;
 
@@ -5195,13 +5371,38 @@ int perf_pmu_register(struct pmu *pmu)
        if (!pmu->pmu_disable_count)
                goto unlock;
 
+       pmu->type = -1;
+       if (!name)
+               goto skip_type;
+       pmu->name = name;
+
+       if (type < 0) {
+               int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
+               if (!err)
+                       goto free_pdc;
+
+               err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
+               if (err) {
+                       ret = err;
+                       goto free_pdc;
+               }
+       }
+       pmu->type = type;
+
+       if (pmu_bus_running) {
+               ret = pmu_dev_alloc(pmu);
+               if (ret)
+                       goto free_idr;
+       }
+
+skip_type:
        pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
        if (pmu->pmu_cpu_context)
                goto got_cpu_context;
 
        pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
        if (!pmu->pmu_cpu_context)
-               goto free_pdc;
+               goto free_dev;
 
        for_each_possible_cpu(cpu) {
                struct perf_cpu_context *cpuctx;
@@ -5245,6 +5446,14 @@ unlock:
 
        return ret;
 
+free_dev:
+       device_del(pmu->dev);
+       put_device(pmu->dev);
+
+free_idr:
+       if (pmu->type >= PERF_TYPE_MAX)
+               idr_remove(&pmu_idr, pmu->type);
+
 free_pdc:
        free_percpu(pmu->pmu_disable_count);
        goto unlock;
@@ -5264,6 +5473,10 @@ void perf_pmu_unregister(struct pmu *pmu)
        synchronize_rcu();
 
        free_percpu(pmu->pmu_disable_count);
+       if (pmu->type >= PERF_TYPE_MAX)
+               idr_remove(&pmu_idr, pmu->type);
+       device_del(pmu->dev);
+       put_device(pmu->dev);
        free_pmu_context(pmu);
 }
 
@@ -5273,6 +5486,13 @@ struct pmu *perf_init_event(struct perf_event *event)
        int idx;
 
        idx = srcu_read_lock(&pmus_srcu);
+
+       rcu_read_lock();
+       pmu = idr_find(&pmu_idr, event->attr.type);
+       rcu_read_unlock();
+       if (pmu)
+               goto unlock;
+
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                int ret = pmu->event_init(event);
                if (!ret)
@@ -5737,6 +5957,12 @@ SYSCALL_DEFINE5(perf_event_open,
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
 
+       /*
+        * Precalculate sample_data sizes
+        */
+       perf_event__header_size(event);
+       perf_event__id_header_size(event);
+
        /*
         * Drop the reference on the group_event after placing the
         * new event on the sibling_list. This ensures destruction
@@ -6089,6 +6315,12 @@ inherit_event(struct perf_event *parent_event,
        child_event->ctx = child_ctx;
        child_event->overflow_handler = parent_event->overflow_handler;
 
+       /*
+        * Precalculate sample_data sizes
+        */
+       perf_event__header_size(child_event);
+       perf_event__id_header_size(child_event);
+
        /*
         * Link it up in the child's context:
         */
@@ -6320,7 +6552,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
        mutex_unlock(&swhash->hlist_mutex);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
 static void perf_pmu_rotate_stop(struct pmu *pmu)
 {
        struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -6374,6 +6606,26 @@ static void perf_event_exit_cpu(int cpu)
 static inline void perf_event_exit_cpu(int cpu) { }
 #endif
 
+static int
+perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu)
+               perf_event_exit_cpu(cpu);
+
+       return NOTIFY_OK;
+}
+
+/*
+ * Run the perf reboot notifier at the very last possible moment so that
+ * the generic watchdog code runs as long as possible.
+ */
+static struct notifier_block perf_reboot_notifier = {
+       .notifier_call = perf_reboot,
+       .priority = INT_MIN,
+};
+
 static int __cpuinit
 perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
@@ -6402,14 +6654,45 @@ void __init perf_event_init(void)
 {
        int ret;
 
+       idr_init(&pmu_idr);
+
        perf_event_init_all_cpus();
        init_srcu_struct(&pmus_srcu);
-       perf_pmu_register(&perf_swevent);
-       perf_pmu_register(&perf_cpu_clock);
-       perf_pmu_register(&perf_task_clock);
+       perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
+       perf_pmu_register(&perf_cpu_clock, NULL, -1);
+       perf_pmu_register(&perf_task_clock, NULL, -1);
        perf_tp_register();
        perf_cpu_notifier(perf_cpu_notify);
+       register_reboot_notifier(&perf_reboot_notifier);
 
        ret = init_hw_breakpoint();
        WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 }
+
+static int __init perf_event_sysfs_init(void)
+{
+       struct pmu *pmu;
+       int ret;
+
+       mutex_lock(&pmus_lock);
+
+       ret = bus_register(&pmu_bus);
+       if (ret)
+               goto unlock;
+
+       list_for_each_entry(pmu, &pmus, entry) {
+               if (!pmu->name || pmu->type < 0)
+                       continue;
+
+               ret = pmu_dev_alloc(pmu);
+               WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
+       }
+       pmu_bus_running = 1;
+       ret = 0;
+
+unlock:
+       mutex_unlock(&pmus_lock);
+
+       return ret;
+}
+device_initcall(perf_event_sysfs_init);
index 297d1a0eedb0e68d8b9327f530ba477c93b1222e..c68cead94dd76942beeffa932498d0ae5a2cfe41 100644 (file)
@@ -8293,8 +8293,6 @@ void __init sched_init(void)
                zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
-       perf_event_init();
-
        scheduler_running = 1;
 }
 
index 5abfa151855493735a91fd45a255a45727c8ba97..46404414d8a7d4b187903906927157e1f092020a 100644 (file)
@@ -745,21 +745,21 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
-#endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
        {
-               .procname       = "unknown_nmi_panic",
-               .data           = &unknown_nmi_panic,
+               .procname       = "nmi_watchdog",
+               .data           = &watchdog_enabled,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dowatchdog_enabled,
        },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
        {
-               .procname       = "nmi_watchdog",
-               .data           = &nmi_watchdog_enabled,
+               .procname       = "unknown_nmi_panic",
+               .data           = &unknown_nmi_panic,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_nmi_enabled,
+               .proc_handler   = proc_dointvec,
        },
 #endif
 #if defined(CONFIG_X86)
index 1357c5786064e6c8f030defbbb7f76f690dc3c15..4b2545a136ffcec72d5073b579cf3f41c49fcbd3 100644 (file)
@@ -136,7 +136,6 @@ static const struct bin_table bin_kern_table[] = {
        { CTL_INT,      KERN_IA64_UNALIGNED,            "ignore-unaligned-usertrap" },
        { CTL_INT,      KERN_COMPAT_LOG,                "compat-log" },
        { CTL_INT,      KERN_MAX_LOCK_DEPTH,            "max_lock_depth" },
-       { CTL_INT,      KERN_NMI_WATCHDOG,              "nmi_watchdog" },
        { CTL_INT,      KERN_PANIC_ON_NMI,              "panic_on_unrecovered_nmi" },
        {}
 };
index 39c059ca670e64156e6681782ffa708c6b8d720f..19a359d5e6d58573cc1c74326e488a419b01b342 100644 (file)
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
 /* Count the events in use (per event id, not per instance) */
 static int     total_ref_count;
 
+static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
+                                struct perf_event *p_event)
+{
+       /* No tracing, just counting, so no obvious leak */
+       if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
+               return 0;
+
+       /* Some events are ok to be traced by non-root users... */
+       if (p_event->attach_state == PERF_ATTACH_TASK) {
+               if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
+                       return 0;
+       }
+
+       /*
+        * ...otherwise raw tracepoint data can be a severe data leak,
+        * only allow root to have these.
+        */
+       if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       return 0;
+}
+
 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
                                 struct perf_event *p_event)
 {
        struct hlist_head __percpu *list;
-       int ret = -ENOMEM;
+       int ret;
        int cpu;
 
+       ret = perf_trace_event_perm(tp_event, p_event);
+       if (ret)
+               return ret;
+
        p_event->tp_event = tp_event;
        if (tp_event->perf_refcount++ > 0)
                return 0;
 
+       ret = -ENOMEM;
+
        list = alloc_percpu(struct hlist_head);
        if (!list)
                goto fail;
index 0725eeab1937ef24a301f2c0b0404f64ce95e026..35fde09b81dee7f386c111766b1fdbefac2d7414 100644 (file)
 
 DEFINE_MUTEX(event_mutex);
 
+DEFINE_MUTEX(event_storage_mutex);
+EXPORT_SYMBOL_GPL(event_storage_mutex);
+
+char event_storage[EVENT_STORAGE_SIZE];
+EXPORT_SYMBOL_GPL(event_storage);
+
 LIST_HEAD(ftrace_events);
 LIST_HEAD(ftrace_common_fields);
 
index 4ba44deaac259d05fb67d5d31e8ce5371844a414..4b74d71705c0d2be2a9adf67246823584bd34fcd 100644 (file)
@@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void)   \
 
 #undef __array
 #define __array(type, item, len)                                       \
-       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
-       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+       do {                                                            \
+               BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                 \
+               mutex_lock(&event_storage_mutex);                       \
+               snprintf(event_storage, sizeof(event_storage),          \
+                        "%s[%d]", #type, len);                         \
+               ret = trace_define_field(event_call, event_storage, #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
                                 is_signed_type(type), FILTER_OTHER);   \
-       if (ret)                                                        \
-               return ret;
+               mutex_unlock(&event_storage_mutex);                     \
+               if (ret)                                                \
+                       return ret;                                     \
+       } while (0);
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)                       \
index 6e3c41a4024c1cc66be01218e2c37498498f2469..eb17e143b5dad8b3815e66d661ff89aefea86196 100644 (file)
@@ -57,6 +57,8 @@ static int __init hardlockup_panic_setup(char *str)
 {
        if (!strncmp(str, "panic", 5))
                hardlockup_panic = 1;
+       else if (!strncmp(str, "0", 1))
+               no_watchdog = 1;
        return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -547,13 +549,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
        .notifier_call = cpu_callback
 };
 
-static int __init spawn_watchdog_task(void)
+void __init lockup_detector_init(void)
 {
        void *cpu = (void *)(long)smp_processor_id();
        int err;
 
        if (no_watchdog)
-               return 0;
+               return;
 
        err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
        WARN_ON(notifier_to_errno(err));
@@ -561,6 +563,5 @@ static int __init spawn_watchdog_task(void)
        cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
        register_cpu_notifier(&cpu_nfb);
 
-       return 0;
+       return;
 }
-early_initcall(spawn_watchdog_task);
index 28b42b9274d0b5fe47522d9df8158498be12319a..2d05adb984018776610f573de126f14bda4c9d2e 100644 (file)
@@ -173,7 +173,8 @@ config LOCKUP_DETECTOR
          An NMI is generated every 60 seconds or so to check for hardlockups.
 
 config HARDLOCKUP_DETECTOR
-       def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+       def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
+                !ARCH_HAS_NMI_WATCHDOG
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
        bool "Panic (Reboot) On Soft Lockups"
index 5ad25e17b6cb2782a2101b59ad7cfd442a8af2ea..4eb99ab34053769f5b2b644594427b2bdc108c82 100644 (file)
@@ -214,17 +214,22 @@ ifdef BUILD_C_RECORDMCOUNT
 # The empty.o file is created in the make process in order to determine
 #  the target endianness and word size. It is made before all other C
 #  files, including recordmcount.
-cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then                 \
-                       $(objtree)/scripts/recordmcount "$(@)";                 \
-                   fi;
+sub_cmd_record_mcount =                                        \
+       if [ $(@) != "scripts/mod/empty.o" ]; then      \
+               $(objtree)/scripts/recordmcount "$(@)"; \
+       fi;
 else
-cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
        "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
        "$(if $(CONFIG_64BIT),64,32)" \
        "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \
        "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
        "$(if $(part-of-module),1,0)" "$(@)";
 endif
+cmd_record_mcount =                                            \
+       if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then    \
+               $(sub_cmd_record_mcount)                        \
+       fi;
 endif
 
 define rule_cc_o_c
index b2c63309a65165b471822e99268c828bbdb07777..6f5a498608b292241e93dc9c498e8ce5f6a683cc 100644 (file)
@@ -24,12 +24,47 @@ OPTIONS
 --input=::
         Input file name. (default: perf.data)
 
+-d::
+--dsos=<dso[,dso...]>::
+        Only consider symbols in these dsos.
+-s::
+--symbol=<symbol>::
+        Symbol to annotate.
+
+-f::
+--force::
+        Don't complain, do it.
+
+-v::
+--verbose::
+        Be more verbose. (Show symbol address, etc)
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-k::
+--vmlinux=<file>::
+        vmlinux pathname.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel.
+
+-l::
+--print-line::
+        Print matching source lines (may be slow).
+
+-P::
+--full-paths::
+        Don't shorten the displayed pathnames.
+
 --stdio:: Use the stdio interface.
 
 --tui:: Use the TUI interface Use of --tui requires a tty, if one is not
        present, as when piping to other commands, the stdio interface is
        used. This interfaces starts by centering on the line with more
-       samples, TAB/UNTAB cycles thru the lines with more samples.
+       samples, TAB/UNTAB cycles through the lines with more samples.
 
 SEE ALSO
 --------
index 01b642c0bf8f974aedf2560df4e91cc5dd514a37..5eaac6f26d51e861236cfa48ab7c0509107b472c 100644 (file)
@@ -18,6 +18,9 @@ perf report.
 
 OPTIONS
 -------
+-H::
+--with-hits::
+        Show only DSOs with hits.
 -i::
 --input=::
         Input file name. (default: perf.data)
index 20d97d84ea1c37164005a4f38b8af5e23104ef93..74d7481ed7a6916f8797ed67b91b5c5d80382edc 100644 (file)
@@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data.
 
 OPTIONS
 -------
+-M::
+--displacement::
+        Show position displacement relative to baseline.
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel
+
 -d::
 --dsos=::
        Only consider symbols in these dsos. CSV that understands
@@ -42,7 +54,7 @@ OPTIONS
 --field-separator=::
 
        Use a special separator character and don't pad with spaces, replacing
-       all occurances of this separator in symbol names (and other output)
+       all occurrences of this separator in symbol names (and other output)
        with a '.' character, that thus it's the only non valid separator.
 
 -v::
@@ -50,6 +62,13 @@ OPTIONS
        Be verbose, for instance, show the raw counts in addition to the
        diff.
 
+-f::
+--force::
+       Don't complain, do it.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
index d004e19fe6d6ffcc9c9d8e25822d251d8dda9c6e..dd84cb2f0a8861dd8656b4058ae8ed906f17169c 100644 (file)
@@ -22,7 +22,7 @@ There are a couple of variants of perf kvm:
   a performance counter profile of guest os in realtime
   of an arbitrary workload.
 
-  'perf kvm record <command>' to record the performance couinter profile
+  'perf kvm record <command>' to record the performance counter profile
   of an arbitrary workload and save it into a perf data file. If both
   --host and --guest are input, the perf data file name is perf.data.kvm.
   If there is  no --host but --guest, the file name is perf.data.guest.
@@ -40,6 +40,12 @@ There are a couple of variants of perf kvm:
 
 OPTIONS
 -------
+-i::
+--input=::
+        Input file name.
+-o::
+--output::
+        Output file name.
 --host=::
         Collect host side performance profile.
 --guest=::
index b317102138c82f2a78a58ada898888603f8cc802..921de259ea1086f36ce5b7e6a3426cec5618deba 100644 (file)
@@ -24,6 +24,21 @@ and statistics with this 'perf lock' command.
 
   'perf lock report' reports statistical data.
 
+OPTIONS
+-------
+
+-i::
+--input=<file>::
+        Input file name.
+
+-v::
+--verbose::
+        Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 SEE ALSO
 --------
 linkperf:perf[1]
index 62de1b7f4e760367337042c52760e7a49ded50c7..86b797a35aa6acae540b652345937991257daa4d 100644 (file)
@@ -115,9 +115,9 @@ Each probe argument follows below syntax.
 
 LINE SYNTAX
 -----------
-Line range is descripted by following syntax.
+Line range is described by following syntax.
 
- "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
+ "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
 
 FUNC specifies the function name of showing lines. 'RLN' is the start line
 number from function entry line, and 'RLN2' is the end line number. As same as
index a91f9f9e6e5c27f96623fd10061f12a3041926b6..52462ae26455c264aa83130c7bc50c9ca97807cc 100644 (file)
@@ -39,15 +39,24 @@ OPTIONS
           be passed as follows: '\mem:addr[:[r][w][x]]'.
           If you want to profile read-write accesses in 0x1000, just set
           'mem:0x1000:rw'.
+
+--filter=<filter>::
+        Event filter.
+
 -a::
-        System-wide collection.
+--all-cpus::
+        System-wide collection from all CPUs.
 
 -l::
         Scale counter values.
 
 -p::
 --pid=::
-       Record events on existing pid.
+       Record events on existing process ID.
+
+-t::
+--tid=::
+        Record events on existing thread ID.
 
 -r::
 --realtime=::
@@ -99,6 +108,11 @@ OPTIONS
 --data::
        Sample addresses.
 
+-T::
+--timestamp::
+       Sample timestamps. Use it with 'perf report -D' to see the timestamps,
+       for instance.
+
 -n::
 --no-samples::
        Don't sample.
@@ -109,8 +123,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun
 
 -C::
 --cpu::
-Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode with inheritance mode on (default), samples are captured only when
 the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
index 12052c9ed0babfc3a1c93cc01758ec3b7747ee10..8ba03d6e5398d8387b11f9caf183bed81a0eb5a2 100644 (file)
@@ -20,6 +20,11 @@ OPTIONS
 -i::
 --input=::
         Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -d::
 --dsos=::
        Only consider symbols in these dsos. CSV that understands
@@ -27,6 +32,10 @@ OPTIONS
 -n::
 --show-nr-samples::
        Show the number of samples for each symbol
+
+--showcpuutilization::
+        Show sample percentage for different cpu modes.
+
 -T::
 --threads::
        Show per-thread event counters
@@ -39,12 +48,24 @@ OPTIONS
        Only consider these symbols. CSV that understands
        file://filename entries.
 
+-U::
+--hide-unresolved::
+        Only display entries resolved to a symbol.
+
 -s::
 --sort=::
        Sort by key(s): pid, comm, dso, symbol, parent.
 
+-p::
+--parent=<regex>::
+        regex filter to identify parent, see: '--sort parent'
+
+-x::
+--exclude-other::
+        Only display entries with parent-match.
+
 -w::
---field-width=::
+--column-widths=<width[,width...]>::
        Force each column width to the provided list, for large terminal
        readability.
 
@@ -52,19 +73,26 @@ OPTIONS
 --field-separator=::
 
        Use a special separator character and don't pad with spaces, replacing
-       all occurances of this separator in symbol names (and other output)
+       all occurrences of this separator in symbol names (and other output)
        with a '.' character, that thus it's the only non valid separator.
 
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 -g [type,min]::
 --call-graph::
-        Display callchains using type and min percent threshold.
+        Display call chains using type and min percent threshold.
        type can be either:
-       - flat: single column, linear exposure of callchains.
+       - flat: single column, linear exposure of call chains.
        - graph: use a graph tree, displaying absolute overhead rates.
        - fractal: like graph, but displays relative rates. Each branch of
                 the tree is considered as a new profiled object. +
        Default: fractal,0.5.
 
+--pretty=<key>::
+        Pretty printing style.  key: normal, raw
+
 --stdio:: Use the stdio interface.
 
 --tui:: Use the TUI interface, that is integrated with annotate and allows
@@ -72,6 +100,25 @@ OPTIONS
        requires a tty, if one is not present, as when piping to other
        commands, the stdio interface is used.
 
+-k::
+--vmlinux=<file>::
+        vmlinux pathname
+
+--kallsyms=<file>::
+        kallsyms pathname
+
+-m::
+--modules::
+        Load module symbols. WARNING: This should only be used with -k and
+        a LIVE kernel.
+
+-f::
+--force::
+        Don't complain, do it.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1]
index 8417644a6166b9fcd071b88eebb9dd46f00b8a1f..46822d5fde1c0328ca8af0f7687265adeeca7f0e 100644 (file)
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
 SYNOPSIS
 --------
 [verse]
-'perf sched' {record|latency|replay|trace}
+'perf sched' {record|latency|map|replay|trace}
 
 DESCRIPTION
 -----------
-There are four variants of perf sched:
+There are five variants of perf sched:
 
   'perf sched record <command>' to record the scheduling events
   of an arbitrary workload.
@@ -30,8 +30,22 @@ There are four variants of perf sched:
   of the workload as it occurred when it was recorded - and can repeat
   it a number of times, measuring its performance.)
 
+  'perf sched map' to print a textual context-switching outline of
+  workload captured via perf sched record.  Columns stand for
+  individual CPUs, and the two-letter shortcuts stand for tasks that
+  are running on a CPU. A '*' denotes the CPU that had the event, and
+  a dot signals an idle CPU.
+
 OPTIONS
 -------
+-i::
+--input=<file>::
+        Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -D::
 --dump-raw-trace=::
         Display verbose dump of the sched data.
similarity index 90%
rename from tools/perf/Documentation/perf-trace-perl.txt
rename to tools/perf/Documentation/perf-script-perl.txt
index ee6525ee6d69ada5c5cae3eb928b6626d9cfae49..5bb41e55a3ac3cbf98d728ec4de3df655d77ff61 100644 (file)
@@ -1,19 +1,19 @@
-perf-trace-perl(1)
+perf-script-perl(1)
 ==================
 
 NAME
 ----
-perf-trace-perl - Process trace data with a Perl script
+perf-script-perl - Process trace data with a Perl script
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [-s [Perl]:script[.pl] ]
+'perf script' [-s [Perl]:script[.pl] ]
 
 DESCRIPTION
 -----------
 
-This perf trace option is used to process perf trace data using perf's
+This perf script option is used to process perf script data using perf's
 built-in Perl interpreter.  It reads and processes the input file and
 displays the results of the trace analysis implemented in the given
 Perl script, if any.
@@ -21,7 +21,7 @@ Perl script, if any.
 STARTER SCRIPTS
 ---------------
 
-You can avoid reading the rest of this document by running 'perf trace
+You can avoid reading the rest of this document by running 'perf script
 -g perl' in the same directory as an existing perf.data trace file.
 That will generate a starter script containing a handler for each of
 the event types in the trace file; it simply prints every available
@@ -30,13 +30,13 @@ field for each event in the trace file.
 You can also look at the existing scripts in
 ~/libexec/perf-core/scripts/perl for typical examples showing how to
 do basic things like aggregate event data, print results, etc.  Also,
-the check-perf-trace.pl script, while not interesting for its results,
+the check-perf-script.pl script, while not interesting for its results,
 attempts to exercise all of the main scripting features.
 
 EVENT HANDLERS
 --------------
 
-When perf trace is invoked using a trace script, a user-defined
+When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
 ignored (or passed to a 'trace_handled' function, see below) and the
@@ -112,13 +112,13 @@ write a useful trace script.  The sections below cover the rest.
 SCRIPT LAYOUT
 -------------
 
-Every perf trace Perl script should start by setting up a Perl module
+Every perf script Perl script should start by setting up a Perl module
 search path and 'use'ing a few support modules (see module
 descriptions below):
 
 ----
- use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
- use lib "./Perf-Trace-Util/lib";
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib";
+ use lib "./perf-script-Util/lib";
  use Perf::Trace::Core;
  use Perf::Trace::Context;
  use Perf::Trace::Util;
@@ -162,7 +162,7 @@ sub trace_unhandled
 ----
 
 The remaining sections provide descriptions of each of the available
-built-in perf trace Perl modules and their associated functions.
+built-in perf script Perl modules and their associated functions.
 
 AVAILABLE MODULES AND FUNCTIONS
 -------------------------------
@@ -170,7 +170,7 @@ AVAILABLE MODULES AND FUNCTIONS
 The following sections describe the functions and variables available
 via the various Perf::Trace::* Perl modules.  To use the functions and
 variables from the given module, add the corresponding 'use
-Perf::Trace::XXX' line to your perf trace script.
+Perf::Trace::XXX' line to your perf script script.
 
 Perf::Trace::Core Module
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -204,7 +204,7 @@ argument.
 Perf::Trace::Util Module
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-Various utility functions for use with perf trace:
+Various utility functions for use with perf script:
 
   nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
   nsecs_secs($nsecs) - returns whole secs portion given nsecs
@@ -214,4 +214,4 @@ Various utility functions for use with perf trace:
 
 SEE ALSO
 --------
-linkperf:perf-trace[1]
+linkperf:perf-script[1]
similarity index 89%
rename from tools/perf/Documentation/perf-trace-python.txt
rename to tools/perf/Documentation/perf-script-python.txt
index 693be804dd3d88a167b8d3cdaf3023f5fa2cd5c2..36b38277422c8a9973c3a3fec671271ffeb23ffa 100644 (file)
@@ -1,19 +1,19 @@
-perf-trace-python(1)
+perf-script-python(1)
 ====================
 
 NAME
 ----
-perf-trace-python - Process trace data with a Python script
+perf-script-python - Process trace data with a Python script
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [-s [Python]:script[.py] ]
+'perf script' [-s [Python]:script[.py] ]
 
 DESCRIPTION
 -----------
 
-This perf trace option is used to process perf trace data using perf's
+This perf script option is used to process perf script data using perf's
 built-in Python interpreter.  It reads and processes the input file and
 displays the results of the trace analysis implemented in the given
 Python script, if any.
@@ -23,15 +23,15 @@ A QUICK EXAMPLE
 
 This section shows the process, start to finish, of creating a working
 Python script that aggregates and extracts useful information from a
-raw perf trace stream.  You can avoid reading the rest of this
+raw perf script stream.  You can avoid reading the rest of this
 document if an example is enough for you; the rest of the document
 provides more details on each step and lists the library functions
 available to script writers.
 
 This example actually details the steps that were used to create the
-'syscall-counts' script you see when you list the available perf trace
-scripts via 'perf trace -l'.  As such, this script also shows how to
-integrate your script into the list of general-purpose 'perf trace'
+'syscall-counts' script you see when you list the available perf script
+scripts via 'perf script -l'.  As such, this script also shows how to
+integrate your script into the list of general-purpose 'perf script'
 scripts listed by that command.
 
 The syscall-counts script is a simple script, but demonstrates all the
@@ -105,31 +105,31 @@ That single stream will be recorded in a file in the current directory
 called perf.data.
 
 Once we have a perf.data file containing our data, we can use the -g
-'perf trace' option to generate a Python script that will contain a
+'perf script' option to generate a Python script that will contain a
 callback handler for each event type found in the perf.data trace
 stream (for more details, see the STARTER SCRIPTS section).
 
 ----
-# perf trace -g python
-generated Python script: perf-trace.py
+# perf script -g python
+generated Python script: perf-script.py
 
 The output file created also in the current directory is named
-perf-trace.py.  Here's the file in its entirety:
+perf-script.py.  Here's the file in its entirety:
 
-# perf trace event handlers, generated by perf trace -g python
+# perf script event handlers, generated by perf script -g python
 # Licensed under the terms of the GNU GPL License version 2
 
 # The common_* event handler fields are the most useful fields common to
 # all events.  They don't necessarily correspond to the 'common_*' fields
 # in the format files.  Those fields not available as handler params can
 # be retrieved using Python functions of the form common_*(context).
-# See the perf-trace-python Documentation for the list of available functions.
+# See the perf-script-python Documentation for the list of available functions.
 
 import os
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -160,7 +160,7 @@ def print_header(event_name, cpu, secs, nsecs, pid, comm):
 ----
 
 At the top is a comment block followed by some import statements and a
-path append which every perf trace script should include.
+path append which every perf script script should include.
 
 Following that are a couple generated functions, trace_begin() and
 trace_end(), which are called at the beginning and the end of the
@@ -189,8 +189,8 @@ simply a utility function used for that purpose.  Let's rename the
 script and run it to see the default output:
 
 ----
-# mv perf-trace.py syscall-counts.py
-# perf trace -s syscall-counts.py
+# mv perf-script.py syscall-counts.py
+# perf script -s syscall-counts.py
 
 raw_syscalls__sys_enter     1 00840.847582083     7506 perf                  id=1, args=
 raw_syscalls__sys_enter     1 00840.847595764     7506 perf                  id=1, args=
@@ -216,7 +216,7 @@ import os
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -279,7 +279,7 @@ import os
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -315,7 +315,7 @@ def print_syscall_totals():
 
 The script can be run just as before:
 
-  # perf trace -s syscall-counts.py
+  # perf script -s syscall-counts.py
 
 So those are the essential steps in writing and running a script.  The
 process can be generalized to any tracepoint or set of tracepoints
@@ -324,17 +324,17 @@ interested in by looking at the list of available events shown by
 'perf list' and/or look in /sys/kernel/debug/tracing events for
 detailed event and field info, record the corresponding trace data
 using 'perf record', passing it the list of interesting events,
-generate a skeleton script using 'perf trace -g python' and modify the
+generate a skeleton script using 'perf script -g python' and modify the
 code to aggregate and display it for your particular needs.
 
 After you've done that you may end up with a general-purpose script
 that you want to keep around and have available for future use.  By
 writing a couple of very simple shell scripts and putting them in the
 right place, you can have your script listed alongside the other
-scripts listed by the 'perf trace -l' command e.g.:
+scripts listed by the 'perf script -l' command e.g.:
 
 ----
-root@tropicana:~# perf trace -l
+root@tropicana:~# perf script -l
 List of available trace scripts:
   workqueue-stats                      workqueue stats (ins/exe/create/destroy)
   wakeup-latency                       system-wide min/max/avg wakeup latency
@@ -365,14 +365,14 @@ perf record -a -e raw_syscalls:sys_enter
 The 'report' script is also a shell script with the same base name as
 your script, but with -report appended.  It should also be located in
 the perf/scripts/python/bin directory.  In that script, you write the
-'perf trace -s' command-line needed for running your script:
+'perf script -s' command-line needed for running your script:
 
 ----
 # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
 
 #!/bin/bash
 # description: system-wide syscall counts
-perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py
+perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
 ----
 
 Note that the location of the Python script given in the shell script
@@ -390,17 +390,17 @@ total 32
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
 drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
--rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py
-drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
+-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
+drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util
 -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
 ----
 
 Once you've done that (don't forget to do a new 'make install',
-otherwise your script won't show up at run-time), 'perf trace -l'
+otherwise your script won't show up at run-time), 'perf script -l'
 should show a new entry for your script:
 
 ----
-root@tropicana:~# perf trace -l
+root@tropicana:~# perf script -l
 List of available trace scripts:
   workqueue-stats                      workqueue stats (ins/exe/create/destroy)
   wakeup-latency                       system-wide min/max/avg wakeup latency
@@ -409,19 +409,19 @@ List of available trace scripts:
   syscall-counts                       system-wide syscall counts
 ----
 
-You can now perform the record step via 'perf trace record':
+You can now perform the record step via 'perf script record':
 
-  # perf trace record syscall-counts
+  # perf script record syscall-counts
 
-and display the output using 'perf trace report':
+and display the output using 'perf script report':
 
-  # perf trace report syscall-counts
+  # perf script report syscall-counts
 
 STARTER SCRIPTS
 ---------------
 
 You can quickly get started writing a script for a particular set of
-trace data by generating a skeleton script using 'perf trace -g
+trace data by generating a skeleton script using 'perf script -g
 python' in the same directory as an existing perf.data trace file.
 That will generate a starter script containing a handler for each of
 the event types in the trace file; it simply prints every available
@@ -430,13 +430,13 @@ field for each event in the trace file.
 You can also look at the existing scripts in
 ~/libexec/perf-core/scripts/python for typical examples showing how to
 do basic things like aggregate event data, print results, etc.  Also,
-the check-perf-trace.py script, while not interesting for its results,
+the check-perf-script.py script, while not interesting for its results,
 attempts to exercise all of the main scripting features.
 
 EVENT HANDLERS
 --------------
 
-When perf trace is invoked using a trace script, a user-defined
+When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
 ignored (or passed to a 'trace_handled' function, see below) and the
@@ -510,7 +510,7 @@ write a useful trace script.  The sections below cover the rest.
 SCRIPT LAYOUT
 -------------
 
-Every perf trace Python script should start by setting up a Python
+Every perf script Python script should start by setting up a Python
 module search path and 'import'ing a few support modules (see module
 descriptions below):
 
@@ -519,7 +519,7 @@ descriptions below):
  import sys
 
  sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-             '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+             '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
  from perf_trace_context import *
  from Core import *
@@ -559,15 +559,15 @@ def trace_unhandled(event_name, context, common_cpu, common_secs,
 ----
 
 The remaining sections provide descriptions of each of the available
-built-in perf trace Python modules and their associated functions.
+built-in perf script Python modules and their associated functions.
 
 AVAILABLE MODULES AND FUNCTIONS
 -------------------------------
 
 The following sections describe the functions and variables available
-via the various perf trace Python modules.  To use the functions and
+via the various perf script Python modules.  To use the functions and
 variables from the given module, add the corresponding 'from XXXX
-import' line to your perf trace script.
+import' line to your perf script script.
 
 Core.py Module
 ~~~~~~~~~~~~~~
@@ -610,7 +610,7 @@ argument.
 Util.py Module
 ~~~~~~~~~~~~~~
 
-Various utility functions for use with perf trace:
+Various utility functions for use with perf script:
 
   nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
   nsecs_secs(nsecs) - returns whole secs portion given nsecs
@@ -620,4 +620,4 @@ Various utility functions for use with perf trace:
 
 SEE ALSO
 --------
-linkperf:perf-trace[1]
+linkperf:perf-script[1]
similarity index 62%
rename from tools/perf/Documentation/perf-trace.txt
rename to tools/perf/Documentation/perf-script.txt
index 26aff6bf9e500d0d6699e968a789db651deb9722..29ad94293cd26cf033c4962352f1518b45975652 100644 (file)
@@ -1,71 +1,71 @@
-perf-trace(1)
+perf-script(1)
 =============
 
 NAME
 ----
-perf-trace - Read perf.data (created by perf record) and display trace output
+perf-script - Read perf.data (created by perf record) and display trace output
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [<options>]
-'perf trace' [<options>] record <script> [<record-options>] <command>
-'perf trace' [<options>] report <script> [script-args]
-'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command>
-'perf trace' [<options>] <top-script> [script-args]
+'perf script' [<options>]
+'perf script' [<options>] record <script> [<record-options>] <command>
+'perf script' [<options>] report <script> [script-args]
+'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
+'perf script' [<options>] <top-script> [script-args]
 
 DESCRIPTION
 -----------
 This command reads the input file and displays the trace recorded.
 
-There are several variants of perf trace:
+There are several variants of perf script:
 
-  'perf trace' to see a detailed trace of the workload that was
+  'perf script' to see a detailed trace of the workload that was
   recorded.
 
   You can also run a set of pre-canned scripts that aggregate and
   summarize the raw trace data in various ways (the list of scripts is
-  available via 'perf trace -l').  The following variants allow you to
+  available via 'perf script -l').  The following variants allow you to
   record and run those scripts:
 
-  'perf trace record <script> <command>' to record the events required
-  for 'perf trace report'.  <script> is the name displayed in the
-  output of 'perf trace --list' i.e. the actual script name minus any
+  'perf script record <script> <command>' to record the events required
+  for 'perf script report'.  <script> is the name displayed in the
+  output of 'perf script --list' i.e. the actual script name minus any
   language extension.  If <command> is not specified, the events are
   recorded using the -a (system-wide) 'perf record' option.
 
-  'perf trace report <script> [args]' to run and display the results
+  'perf script report <script> [args]' to run and display the results
   of <script>.  <script> is the name displayed in the output of 'perf
   trace --list' i.e. the actual script name minus any language
-  extension.  The perf.data output from a previous run of 'perf trace
+  extension.  The perf.data output from a previous run of 'perf script
   record <script>' is used and should be present for this command to
   succeed.  [args] refers to the (mainly optional) args expected by
   the script.
 
-  'perf trace <script> <required-script-args> <command>' to both
+  'perf script <script> <required-script-args> <command>' to both
   record the events required for <script> and to run the <script>
   using 'live-mode' i.e. without writing anything to disk.  <script>
-  is the name displayed in the output of 'perf trace --list' i.e. the
+  is the name displayed in the output of 'perf script --list' i.e. the
   actual script name minus any language extension.  If <command> is
   not specified, the events are recorded using the -a (system-wide)
   'perf record' option.  If <script> has any required args, they
   should be specified before <command>.  This mode doesn't allow for
   optional script args to be specified; if optional script args are
-  desired, they can be specified using separate 'perf trace record'
-  and 'perf trace report' commands, with the stdout of the record step
+  desired, they can be specified using separate 'perf script record'
+  and 'perf script report' commands, with the stdout of the record step
   piped to the stdin of the report script, using the '-o -' and '-i -'
   options of the corresponding commands.
 
-  'perf trace <top-script>' to both record the events required for
+  'perf script <top-script>' to both record the events required for
   <top-script> and to run the <top-script> using 'live-mode'
   i.e. without writing anything to disk.  <top-script> is the name
-  displayed in the output of 'perf trace --list' i.e. the actual
+  displayed in the output of 'perf script --list' i.e. the actual
   script name minus any language extension; a <top-script> is defined
   as any script name ending with the string 'top'.
 
-  [<record-options>] can be passed to the record steps of 'perf trace
+  [<record-options>] can be passed to the record steps of 'perf script
   record' and 'live-mode' variants; this isn't possible however for
-  <top-script> 'live-mode' or 'perf trace report' variants.
+  <top-script> 'live-mode' or 'perf script report' variants.
 
   See the 'SEE ALSO' section for links to language-specific
   information on how to write and run your own trace scripts.
@@ -76,7 +76,7 @@ OPTIONS
        Any command you can specify in a shell.
 
 -D::
---dump-raw-trace=::
+--dump-raw-script=::
         Display verbose dump of the trace data.
 
 -L::
@@ -95,7 +95,7 @@ OPTIONS
 
 -g::
 --gen-script=::
-        Generate perf-trace.[ext] starter script for given language,
+        Generate perf-script.[ext] starter script for given language,
         using current perf.data.
 
 -a::
@@ -104,8 +104,15 @@ OPTIONS
         normally don't - this option allows the latter to be run in
         system-wide mode.
 
+-i::
+--input=::
+        Input file name.
+
+-d::
+--debug-mode::
+        Do various checks like samples ordering and lost events.
 
 SEE ALSO
 --------
-linkperf:perf-record[1], linkperf:perf-trace-perl[1],
-linkperf:perf-trace-python[1]
+linkperf:perf-record[1], linkperf:perf-script-perl[1],
+linkperf:perf-script-python[1]
index 4b3a2d46b4378607f5195d12328646f5b1d7a638..b6da7affbbeeb82533387e9ba3f7c788d3e28dec 100644 (file)
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
 SYNOPSIS
 --------
 [verse]
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
 
 DESCRIPTION
 -----------
@@ -35,24 +35,54 @@ OPTIONS
         child tasks do not inherit counters
 -p::
 --pid=<pid>::
-        stat events on existing pid
+        stat events on existing process id
+
+-t::
+--tid=<tid>::
+        stat events on existing thread id
+
 
 -a::
-        system-wide collection
+--all-cpus::
+        system-wide collection from all CPUs
 
 -c::
-        scale counter values
+--scale::
+       scale/normalize counter values
+
+-r::
+--repeat=<n>::
+       repeat command and print average + stddev (max: 100)
 
 -B::
+--big-num::
         print large numbers with thousands' separators according to locale
 
 -C::
 --cpu=::
-Count only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Count only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode, this option is ignored. The -a option is still necessary
 to activate system-wide monitoring. Default is to count on all CPUs.
 
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
+This option is only valid in system-wide mode.
+
+-n::
+--null::
+        null run - don't start any counters
+
+-v::
+--verbose::
+        be more verbose (show counter open errors, etc)
+
+-x SEP::
+--field-separator SEP::
+print counts using a CSV-style output to make it easy to import directly into
+spreadsheets. Columns are separated by the string specified in SEP.
+
 EXAMPLES
 --------
 
index 1c4b5f5b7f71ec7047be7f02a369e8e27fc8710d..2c3b462f64b00531b4c8a7e5fc5cb6224dfc88d6 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-This command does assorted sanity tests, initially thru linked routines but
+This command does assorted sanity tests, initially through linked routines but
 also will look for a directory with more tests in the form of scripts.
 
 OPTIONS
index 4b1788355ecac3d305bf72e6f58d5a477e08ba7b..d7b79e2ba2adbe2cc0cb6468a9d84d6b73b8ed0f 100644 (file)
@@ -38,6 +38,8 @@ OPTIONS
 --process::
         Select the processes to display, by name or PID
 
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
 
 SEE ALSO
 --------
index 1f9687663f2a9cd62d6cff9f597395523b3da931..f6eb1cdafb7758162463b0ca8a25525f94c44d19 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-This command generates and displays a performance counter profile in realtime.
+This command generates and displays a performance counter profile in real time.
 
 
 OPTIONS
@@ -27,8 +27,8 @@ OPTIONS
 
 -C <cpu-list>::
 --cpu=<cpu>::
-Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 Default is to monitor all CPUS.
 
 -d <seconds>::
@@ -50,6 +50,10 @@ Default is to monitor all CPUS.
 --count-filter=<count>::
        Only display functions with more events than this.
 
+-g::
+--group::
+        Put the counters into a counter group.
+
 -F <freq>::
 --freq=<freq>::
        Profile at this frequency.
@@ -68,7 +72,11 @@ Default is to monitor all CPUS.
 
 -p <pid>::
 --pid=<pid>::
-       Profile events on existing pid.
+       Profile events on existing Process ID.
+
+-t <tid>::
+--tid=<tid>::
+        Profile events on existing thread ID.
 
 -r <priority>::
 --realtime=<priority>::
@@ -78,6 +86,18 @@ Default is to monitor all CPUS.
 --sym-annotate=<symbol>::
         Annotate this symbol.
 
+-K::
+--hide_kernel_symbols::
+        Hide kernel symbols.
+
+-U::
+--hide_user_symbols::
+        Hide user symbols.
+
+-D::
+--dump-symtab::
+        Dump the symbol table used for profiling.
+
 -v::
 --verbose::
        Be more verbose (show counter open errors, etc).
index 8c7fc0c8f0b8cd0a77ddb58fb5b712379276ebc8..c12659d8cb26fcefd8449581921aef2962cb651c 100644 (file)
@@ -7,6 +7,7 @@ include/linux/stringify.h
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
+arch/*/lib/memcpy*.S
 include/linux/poison.h
 include/linux/magic.h
 include/linux/hw_breakpoint.h
index d1db0f676a4bf14850fa0264e78fe3d482d376dc..ac6692cf550878faa3ca6c921c804cdd3dbfbe77 100644 (file)
@@ -185,7 +185,10 @@ ifeq ($(ARCH),i386)
         ARCH := x86
 endif
 ifeq ($(ARCH),x86_64)
+       RAW_ARCH := x86_64
         ARCH := x86
+       ARCH_CFLAGS := -DARCH_X86_64
+       ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
 endif
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h
 LIB_H += util/include/linux/rbtree.h
 LIB_H += util/include/linux/string.h
 LIB_H += util/include/linux/types.h
+LIB_H += util/include/linux/linkage.h
 LIB_H += util/include/asm/asm-offsets.h
 LIB_H += util/include/asm/bug.h
 LIB_H += util/include/asm/byteorder.h
@@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h
 LIB_H += util/include/asm/system.h
 LIB_H += util/include/asm/uaccess.h
 LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
 LIB_H += perf.h
 LIB_H += util/cache.h
 LIB_H += util/callchain.h
@@ -417,6 +423,7 @@ LIB_H += util/probe-finder.h
 LIB_H += util/probe-event.h
 LIB_H += util/pstack.h
 LIB_H += util/cpumap.h
+LIB_H += $(ARCH_INCLUDE)
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -472,6 +479,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
 # Benchmark modules
 BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
 BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -485,7 +495,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-report.o
 BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
 BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
 BUILTIN_OBJS += $(OUTPUT)builtin-top.o
-BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
+BUILTIN_OBJS += $(OUTPUT)builtin-script.o
 BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
 BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
 BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
@@ -507,7 +517,7 @@ PERFLIBS = $(LIB_FILE)
 -include config.mak
 
 ifndef NO_DWARF
-FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
+FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
 ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
        msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
        NO_DWARF := 1
@@ -554,7 +564,7 @@ ifndef NO_DWARF
 ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
        msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
 else
-       BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT
+       BASIC_CFLAGS += -DDWARF_SUPPORT
        EXTLIBS += -lelf -ldw
        LIB_OBJS += $(OUTPUT)util/probe-finder.o
 endif # PERF_HAVE_DWARF_REGS
@@ -891,13 +901,14 @@ prefix_SQ = $(subst ','\'',$(prefix))
 SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
 PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
 
-LIBS = $(PERFLIBS) $(EXTLIBS)
+LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
 
 BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
        $(COMPAT_CFLAGS)
 LIB_OBJS += $(COMPAT_OBJS)
 
 ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_CFLAGS += $(ARCH_CFLAGS)
 ALL_LDFLAGS += $(BASIC_LDFLAGS)
 
 export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644 (file)
index 0000000..a72e36c
--- /dev/null
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc)              \
+       extern void *fn(void *, const void *, size_t);
+
+#include "mem-memcpy-x86-64-asm-def.h"
+
+#undef MEMCPY_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644 (file)
index 0000000..d588b87
--- /dev/null
@@ -0,0 +1,4 @@
+
+MEMCPY_FN(__memcpy,
+       "x86-64-unrolled",
+       "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644 (file)
index 0000000..a57b66e
--- /dev/null
@@ -0,0 +1,2 @@
+
+#include "../../../arch/x86/lib/memcpy_64.S"
index 38dae7465142fdb86037cbae95a85a3ede9e24ed..db82021f4b91c7172a2fe43b4902e67f022af7b9 100644 (file)
@@ -12,6 +12,7 @@
 #include "../util/parse-options.h"
 #include "../util/header.h"
 #include "bench.h"
+#include "mem-memcpy-arch.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 
 static const char      *length_str     = "1MB";
 static const char      *routine        = "default";
-static bool            use_clock       = false;
+static bool            use_clock;
 static int             clock_fd;
+static bool            only_prefault;
+static bool            no_prefault;
 
 static const struct option options[] = {
        OPT_STRING('l', "length", &length_str, "1MB",
@@ -34,19 +37,33 @@ static const struct option options[] = {
                    "Specify routine to copy"),
        OPT_BOOLEAN('c', "clock", &use_clock,
                    "Use CPU clock for measuring"),
+       OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+                   "Show only the result with page faults before memcpy()"),
+       OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+                   "Show only the result without page faults before memcpy()"),
        OPT_END()
 };
 
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+
 struct routine {
        const char *name;
        const char *desc;
-       void * (*fn)(void *dst, const void *src, size_t len);
+       memcpy_t fn;
 };
 
 struct routine routines[] = {
        { "default",
          "Default memcpy() provided by glibc",
          memcpy },
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memcpy-x86-64-asm-def.h"
+#undef MEMCPY_FN
+
+#endif
+
        { NULL,
          NULL,
          NULL   }
@@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts)
                (double)ts->tv_usec / (double)1000000;
 }
 
+static void alloc_mem(void **dst, void **src, size_t length)
+{
+       *dst = zalloc(length);
+       if (!dst)
+               die("memory allocation failed - maybe length is too large?\n");
+
+       *src = zalloc(length);
+       if (!src)
+               die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
+{
+       u64 clock_start = 0ULL, clock_end = 0ULL;
+       void *src = NULL, *dst = NULL;
+
+       alloc_mem(&src, &dst, len);
+
+       if (prefault)
+               fn(dst, src, len);
+
+       clock_start = get_clock();
+       fn(dst, src, len);
+       clock_end = get_clock();
+
+       free(src);
+       free(dst);
+       return clock_end - clock_start;
+}
+
+static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
+{
+       struct timeval tv_start, tv_end, tv_diff;
+       void *src = NULL, *dst = NULL;
+
+       alloc_mem(&src, &dst, len);
+
+       if (prefault)
+               fn(dst, src, len);
+
+       BUG_ON(gettimeofday(&tv_start, NULL));
+       fn(dst, src, len);
+       BUG_ON(gettimeofday(&tv_end, NULL));
+
+       timersub(&tv_end, &tv_start, &tv_diff);
+
+       free(src);
+       free(dst);
+       return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {                                      \
+               if (x < K)                                      \
+                       printf(" %14lf B/Sec", x);              \
+               else if (x < K * K)                             \
+                       printf(" %14lfd KB/Sec", x / K);        \
+               else if (x < K * K * K)                         \
+                       printf(" %14lf MB/Sec", x / K / K);     \
+               else                                            \
+                       printf(" %14lf GB/Sec", x / K / K / K); \
+       } while (0)
+
 int bench_mem_memcpy(int argc, const char **argv,
                     const char *prefix __used)
 {
        int i;
-       void *dst, *src;
-       size_t length;
-       double bps = 0.0;
-       struct timeval tv_start, tv_end, tv_diff;
-       u64 clock_start, clock_end, clock_diff;
+       size_t len;
+       double result_bps[2];
+       u64 result_clock[2];
 
-       clock_start = clock_end = clock_diff = 0ULL;
        argc = parse_options(argc, argv, options,
                             bench_mem_memcpy_usage, 0);
 
-       tv_diff.tv_sec = 0;
-       tv_diff.tv_usec = 0;
-       length = (size_t)perf_atoll((char *)length_str);
+       if (use_clock)
+               init_clock();
+
+       len = (size_t)perf_atoll((char *)length_str);
 
-       if ((s64)length <= 0) {
+       result_clock[0] = result_clock[1] = 0ULL;
+       result_bps[0] = result_bps[1] = 0.0;
+
+       if ((s64)len <= 0) {
                fprintf(stderr, "Invalid length:%s\n", length_str);
                return 1;
        }
 
+       /* same to without specifying either of prefault and no-prefault */
+       if (only_prefault && no_prefault)
+               only_prefault = no_prefault = false;
+
        for (i = 0; routines[i].name; i++) {
                if (!strcmp(routines[i].name, routine))
                        break;
@@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv,
                return 1;
        }
 
-       dst = zalloc(length);
-       if (!dst)
-               die("memory allocation failed - maybe length is too large?\n");
-
-       src = zalloc(length);
-       if (!src)
-               die("memory allocation failed - maybe length is too large?\n");
-
-       if (bench_format == BENCH_FORMAT_DEFAULT) {
-               printf("# Copying %s Bytes from %p to %p ...\n\n",
-                      length_str, src, dst);
-       }
-
-       if (use_clock) {
-               init_clock();
-               clock_start = get_clock();
-       } else {
-               BUG_ON(gettimeofday(&tv_start, NULL));
-       }
-
-       routines[i].fn(dst, src, length);
+       if (bench_format == BENCH_FORMAT_DEFAULT)
+               printf("# Copying %s Bytes ...\n\n", length_str);
 
-       if (use_clock) {
-               clock_end = get_clock();
-               clock_diff = clock_end - clock_start;
+       if (!only_prefault && !no_prefault) {
+               /* show both of results */
+               if (use_clock) {
+                       result_clock[0] =
+                               do_memcpy_clock(routines[i].fn, len, false);
+                       result_clock[1] =
+                               do_memcpy_clock(routines[i].fn, len, true);
+               } else {
+                       result_bps[0] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, false);
+                       result_bps[1] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, true);
+               }
        } else {
-               BUG_ON(gettimeofday(&tv_end, NULL));
-               timersub(&tv_end, &tv_start, &tv_diff);
-               bps = (double)((double)length / timeval2double(&tv_diff));
+               if (use_clock) {
+                       result_clock[pf] =
+                               do_memcpy_clock(routines[i].fn,
+                                               len, only_prefault);
+               } else {
+                       result_bps[pf] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, only_prefault);
+               }
        }
 
        switch (bench_format) {
        case BENCH_FORMAT_DEFAULT:
-               if (use_clock) {
-                       printf(" %14lf Clock/Byte\n",
-                              (double)clock_diff / (double)length);
-               } else {
-                       if (bps < K)
-                               printf(" %14lf B/Sec\n", bps);
-                       else if (bps < K * K)
-                               printf(" %14lfd KB/Sec\n", bps / 1024);
-                       else if (bps < K * K * K)
-                               printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
-                       else {
-                               printf(" %14lf GB/Sec\n",
-                                      bps / 1024 / 1024 / 1024);
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte\n",
+                                       (double)result_clock[0]
+                                       / (double)len);
+                               printf(" %14lf Clock/Byte (with prefault)\n",
+                                       (double)result_clock[1]
+                                       / (double)len);
+                       } else {
+                               print_bps(result_bps[0]);
+                               printf("\n");
+                               print_bps(result_bps[1]);
+                               printf(" (with prefault)\n");
                        }
+               } else {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte",
+                                       (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               print_bps(result_bps[pf]);
+
+                       printf("%s\n", only_prefault ? " (with prefault)" : "");
                }
                break;
        case BENCH_FORMAT_SIMPLE:
-               if (use_clock) {
-                       printf("%14lf\n",
-                              (double)clock_diff / (double)length);
-               } else
-                       printf("%lf\n", bps);
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf("%lf %lf\n",
+                                       (double)result_clock[0] / (double)len,
+                                       (double)result_clock[1] / (double)len);
+                       } else {
+                               printf("%lf %lf\n",
+                                       result_bps[0], result_bps[1]);
+                       }
+               } else {
+                       if (use_clock) {
+                               printf("%lf\n", (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               printf("%lf\n", result_bps[pf]);
+               }
                break;
        default:
                /* reaching this means there's some disaster: */
index 6d5604d8df9599acb55d87017f5d58e19d906395..c056cdc0691258b159665ca3e8c74d2963543ccf 100644 (file)
@@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
        return hist_entry__inc_addr_samples(he, al->addr);
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
        struct addr_location al;
-       struct sample_data data;
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                pr_warning("problem processing %d event, skipping it.\n",
                           event->header.type);
                return -1;
@@ -375,6 +375,8 @@ static struct perf_event_ops event_ops = {
        .mmap   = event__process_mmap,
        .comm   = event__process_comm,
        .fork   = event__process_task,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 static int __cmd_annotate(void)
@@ -382,7 +384,7 @@ static int __cmd_annotate(void)
        int ret;
        struct perf_session *session;
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
index c49837de7d3f74eb9254d83e4f6457b11ae38c17..5af32ae9031ec83fc2db926df9360d1ce8f1620c 100644 (file)
@@ -38,7 +38,8 @@ static int __cmd_buildid_list(void)
 {
        struct perf_session *session;
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false,
+                                   &build_id__mark_dso_hit_ops);
        if (session == NULL)
                return -1;
 
index fca1d4402910ab13a6f7aa45299e31c0289cb27c..3153e492dbcc29e1593b6df29357424dd012da99 100644 (file)
@@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self,
        return -ENOMEM;
 }
 
-static int diff__process_sample_event(event_t *event, struct perf_session *session)
+static int diff__process_sample_event(event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_session *session)
 {
        struct addr_location al;
-       struct sample_data data = { .period = 1, };
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                pr_warning("problem processing %d event, skipping it.\n",
                           event->header.type);
                return -1;
@@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
        if (al.filtered || al.sym == NULL)
                return 0;
 
-       if (hists__add_entry(&session->hists, &al, data.period)) {
+       if (hists__add_entry(&session->hists, &al, sample->period)) {
                pr_warning("problem incrementing symbol period, skipping event\n");
                return -1;
        }
 
-       session->hists.stats.total_period += data.period;
+       session->hists.stats.total_period += sample->period;
        return 0;
 }
 
@@ -60,6 +61,8 @@ static struct perf_event_ops event_ops = {
        .exit   = event__process_task,
        .fork   = event__process_task,
        .lost   = event__process_lost,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -141,8 +144,8 @@ static int __cmd_diff(void)
        int ret, i;
        struct perf_session *session[2];
 
-       session[0] = perf_session__new(input_old, O_RDONLY, force, false);
-       session[1] = perf_session__new(input_new, O_RDONLY, force, false);
+       session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
+       session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
        if (session[0] == NULL || session[1] == NULL)
                return -ENOMEM;
 
@@ -173,7 +176,7 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
-       OPT_BOOLEAN('m', "displacement", &show_displacement,
+       OPT_BOOLEAN('M', "displacement", &show_displacement,
                    "Show position displacement relative to baseline"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
@@ -191,6 +194,8 @@ static const struct option options[] = {
        OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
                   "separator for columns, no spaces will be added between "
                   "columns '.' is reserved."),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index 8e3e47b064cea7ddea4e98f0fb98202b35643632..0c78ffa7bf675f46c9e631d9fa8d51fbc71aded4 100644 (file)
@@ -16,8 +16,8 @@
 static char            const *input_name = "-";
 static bool            inject_build_ids;
 
-static int event__repipe(event_t *event __used,
-                        struct perf_session *session __used)
+static int event__repipe_synth(event_t *event,
+                              struct perf_session *session __used)
 {
        uint32_t size;
        void *buf = event;
@@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used,
        return 0;
 }
 
-static int event__repipe_mmap(event_t *self, struct perf_session *session)
+static int event__repipe(event_t *event, struct sample_data *sample __used,
+                        struct perf_session *session)
+{
+       return event__repipe_synth(event, session);
+}
+
+static int event__repipe_mmap(event_t *self, struct sample_data *sample,
+                             struct perf_session *session)
 {
        int err;
 
-       err = event__process_mmap(self, session);
-       event__repipe(self, session);
+       err = event__process_mmap(self, sample, session);
+       event__repipe(self, sample, session);
 
        return err;
 }
 
-static int event__repipe_task(event_t *self, struct perf_session *session)
+static int event__repipe_task(event_t *self, struct sample_data *sample,
+                             struct perf_session *session)
 {
        int err;
 
-       err = event__process_task(self, session);
-       event__repipe(self, session);
+       err = event__process_task(self, sample, session);
+       event__repipe(self, sample, session);
 
        return err;
 }
@@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self,
 {
        int err;
 
-       event__repipe(self, session);
+       event__repipe_synth(self, session);
        err = event__process_tracing_data(self, session);
 
        return err;
@@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
        return 0;
 }
 
-static int event__inject_buildid(event_t *event, struct perf_session *session)
+static int event__inject_buildid(event_t *event, struct sample_data *sample,
+                                struct perf_session *session)
 {
        struct addr_location al;
        struct thread *thread;
@@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session)
        }
 
 repipe:
-       event__repipe(event, session);
+       event__repipe(event, sample, session);
        return 0;
 }
 
@@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = {
        .read           = event__repipe,
        .throttle       = event__repipe,
        .unthrottle     = event__repipe,
-       .attr           = event__repipe,
-       .event_type     = event__repipe,
-       .tracing_data   = event__repipe,
-       .build_id       = event__repipe,
+       .attr           = event__repipe_synth,
+       .event_type     = event__repipe_synth,
+       .tracing_data   = event__repipe_synth,
+       .build_id       = event__repipe_synth,
 };
 
 extern volatile int session_done;
@@ -187,7 +196,7 @@ static int __cmd_inject(void)
                inject_ops.tracing_data = event__repipe_tracing_data;
        }
 
-       session = perf_session__new(input_name, O_RDONLY, false, true);
+       session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
        if (session == NULL)
                return -ENOMEM;
 
index 31f60a2535e0ec95b60e6b90e3e24818fa0dd972..def7ddc2fd4fbc1b1c57795f4c10c519729b035d 100644 (file)
@@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data,
        }
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
-       struct thread *thread;
+       struct thread *thread = perf_session__findnew(session, event->ip.pid);
 
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = 1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, event->ip.pid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                         event->header.type);
@@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
        dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-       process_raw_event(event, data.raw_data, data.cpu,
-                         data.time, thread);
+       process_raw_event(event, sample->raw_data, sample->cpu,
+                         sample->time, thread);
 
        return 0;
 }
@@ -492,7 +481,8 @@ static void sort_result(void)
 static int __cmd_kmem(void)
 {
        int err = -EINVAL;
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -747,6 +737,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
index 821c1586a22b7da92cd732ad1a4ee4e05b037085..b9c6e54329713e326d74da9b08164e562e521d03 100644 (file)
@@ -834,22 +834,18 @@ static void dump_info(void)
                die("Unknown type of information\n");
 }
 
-static int process_sample_event(event_t *self, struct perf_session *s)
+static int process_sample_event(event_t *self, struct sample_data *sample,
+                               struct perf_session *s)
 {
-       struct sample_data data;
-       struct thread *thread;
+       struct thread *thread = perf_session__findnew(s, sample->tid);
 
-       bzero(&data, sizeof(data));
-       event__parse_sample(self, s->sample_type, &data);
-
-       thread = perf_session__findnew(s, data.tid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                        self->header.type);
                return -1;
        }
 
-       process_raw_event(data.raw_data, data.cpu, data.time, thread);
+       process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
 
        return 0;
 }
@@ -862,7 +858,7 @@ static struct perf_event_ops eops = {
 
 static int read_events(void)
 {
-       session = perf_session__new(input_name, O_RDONLY, 0, false);
+       session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
        if (!session)
                die("Initializing perf session failed\n");
 
@@ -947,6 +943,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
@@ -982,9 +981,9 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
                                usage_with_options(report_usage, report_options);
                }
                __cmd_report();
-       } else if (!strcmp(argv[0], "trace")) {
-               /* Aliased to 'perf trace' */
-               return cmd_trace(argc, argv, prefix);
+       } else if (!strcmp(argv[0], "script")) {
+               /* Aliased to 'perf script' */
+               return cmd_script(argc, argv, prefix);
        } else if (!strcmp(argv[0], "info")) {
                if (argc) {
                        argc = parse_options(argc, argv,
index 564491fa18b27838dd79125954bc744f51f7fe2c..50efbd509b8f1950c09436c866d8f950b70975fb 100644 (file)
@@ -36,6 +36,7 @@ static int                    *fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static u64                     user_interval                   = ULLONG_MAX;
 static u64                     default_interval                =      0;
+static u64                     sample_type;
 
 static int                     nr_cpus                         =      0;
 static unsigned int            page_size;
@@ -48,6 +49,7 @@ static const char             *output_name                    = "perf.data";
 static int                     group                           =      0;
 static int                     realtime_prio                   =      0;
 static bool                    raw_samples                     =  false;
+static bool                    sample_id_all_avail             =   true;
 static bool                    system_wide                     =  false;
 static pid_t                   target_pid                      =     -1;
 static pid_t                   target_tid                      =     -1;
@@ -60,7 +62,9 @@ static bool                   call_graph                      =  false;
 static bool                    inherit_stat                    =  false;
 static bool                    no_samples                      =  false;
 static bool                    sample_address                  =  false;
+static bool                    sample_time                     =  false;
 static bool                    no_buildid                      =  false;
+static bool                    no_buildid_cache                =  false;
 
 static long                    samples                         =      0;
 static u64                     bytes_written                   =      0;
@@ -128,6 +132,7 @@ static void write_output(void *buf, size_t size)
 }
 
 static int process_synthesized_event(event_t *event,
+                                    struct sample_data *sample __used,
                                     struct perf_session *self __used)
 {
        write_output(event, event->header.size);
@@ -238,6 +243,19 @@ static void create_counter(int counter, int cpu)
                u64 time_running;
                u64 id;
        } read_data;
+       /*
+        * Check if parse_single_tracepoint_event has already asked for
+        * PERF_SAMPLE_TIME.
+        *
+        * XXX this is kludgy but short term fix for problems introduced by
+        * eac23d1c that broke 'perf script' by having different sample_types
+        * when using multiple tracepoint events when we use a perf binary
+        * that tries to use sample_id_all on an older kernel.
+        *
+        * We need to move counter creation to perf_session, support
+        * different sample_types, etc.
+        */
+       bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
 
        attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
                                  PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -280,6 +298,10 @@ static void create_counter(int counter, int cpu)
        if (system_wide)
                attr->sample_type       |= PERF_SAMPLE_CPU;
 
+       if (sample_id_all_avail &&
+           (sample_time || system_wide || !no_inherit || cpu_list))
+               attr->sample_type       |= PERF_SAMPLE_TIME;
+
        if (raw_samples) {
                attr->sample_type       |= PERF_SAMPLE_TIME;
                attr->sample_type       |= PERF_SAMPLE_RAW;
@@ -293,6 +315,8 @@ static void create_counter(int counter, int cpu)
                attr->disabled = 1;
                attr->enable_on_exec = 1;
        }
+retry_sample_id:
+       attr->sample_id_all = sample_id_all_avail ? 1 : 0;
 
        for (thread_index = 0; thread_index < thread_num; thread_index++) {
 try_again:
@@ -309,6 +333,15 @@ try_again:
                        else if (err ==  ENODEV && cpu_list) {
                                die("No such device - did you specify"
                                        " an out-of-range profile CPU?\n");
+                       } else if (err == EINVAL && sample_id_all_avail) {
+                               /*
+                                * Old kernel, no attr->sample_id_type_all field
+                                */
+                               sample_id_all_avail = false;
+                               if (!sample_time && !raw_samples && !time_needed)
+                                       attr->sample_type &= ~PERF_SAMPLE_TIME;
+
+                               goto retry_sample_id;
                        }
 
                        /*
@@ -326,7 +359,7 @@ try_again:
                                goto try_again;
                        }
                        printf("\n");
-                       error("perfcounter syscall returned with %d (%s)\n",
+                       error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
                                        fd[nr_cpu][counter][thread_index], strerror(err));
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -406,6 +439,9 @@ try_again:
                        }
                }
        }
+
+       if (!sample_type)
+               sample_type = attr->sample_type;
 }
 
 static void open_counters(int cpu)
@@ -437,7 +473,8 @@ static void atexit_header(void)
        if (!pipe_output) {
                session->header.data_size += bytes_written;
 
-               process_buildids();
+               if (!no_buildid)
+                       process_buildids();
                perf_header__write(&session->header, output, true);
                perf_session__delete(session);
                symbol__exit();
@@ -552,12 +589,15 @@ static int __cmd_record(int argc, const char **argv)
        }
 
        session = perf_session__new(output_name, O_WRONLY,
-                                   write_mode == WRITE_FORCE, false);
+                                   write_mode == WRITE_FORCE, false, NULL);
        if (session == NULL) {
                pr_err("Not enough memory for reading perf file header\n");
                return -1;
        }
 
+       if (!no_buildid)
+               perf_header__set_feat(&session->header, HEADER_BUILD_ID);
+
        if (!file_new) {
                err = perf_header__read(session, output);
                if (err < 0)
@@ -639,6 +679,8 @@ static int __cmd_record(int argc, const char **argv)
                        open_counters(cpumap[i]);
        }
 
+       perf_session__set_sample_type(session, sample_type);
+
        if (pipe_output) {
                err = perf_header__write_pipe(output);
                if (err < 0)
@@ -651,6 +693,8 @@ static int __cmd_record(int argc, const char **argv)
 
        post_processing_offset = lseek(output, 0, SEEK_CUR);
 
+       perf_session__set_sample_id_all(session, sample_id_all_avail);
+
        if (pipe_output) {
                err = event__synthesize_attrs(&session->header,
                                              process_synthesized_event,
@@ -831,10 +875,13 @@ const struct option record_options[] = {
                    "per thread counts"),
        OPT_BOOLEAN('d', "data", &sample_address,
                    "Sample addresses"),
+       OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
        OPT_BOOLEAN('n', "no-samples", &no_samples,
                    "don't sample"),
-       OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
+       OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
                    "do not update the buildid cache"),
+       OPT_BOOLEAN('B', "no-buildid", &no_buildid,
+                   "do not collect buildids in perf.data"),
        OPT_END()
 };
 
@@ -859,7 +906,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
        }
 
        symbol__init();
-       if (no_buildid)
+
+       if (no_buildid_cache || no_buildid)
                disable_buildid_cache();
 
        if (!nr_counters) {
index 5de405d452300318541338293563d8ebc41ccb87..75183a4518e60d23db05e36e585fc178df731a19 100644 (file)
@@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session,
        return 0;
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data = { .period = 1, };
        struct addr_location al;
        struct perf_event_attr *attr;
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                fprintf(stderr, "problem processing %d event, skipping it.\n",
                        event->header.type);
                return -1;
@@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
        if (al.filtered || (hide_unresolved && al.sym == NULL))
                return 0;
 
-       if (perf_session__add_hist_entry(session, &al, &data)) {
+       if (perf_session__add_hist_entry(session, &al, sample)) {
                pr_debug("problem incrementing symbol period, skipping event\n");
                return -1;
        }
 
-       attr = perf_header__find_attr(data.id, &session->header);
+       attr = perf_header__find_attr(sample->id, &session->header);
 
-       if (add_event_total(session, &data, attr)) {
+       if (add_event_total(session, sample, attr)) {
                pr_debug("problem adding event period\n");
                return -1;
        }
@@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
        return 0;
 }
 
-static int process_read_event(event_t *event, struct perf_session *session __used)
+static int process_read_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        struct perf_event_attr *attr;
 
@@ -243,6 +244,8 @@ static struct perf_event_ops event_ops = {
        .event_type = event__process_event_type,
        .tracing_data = event__process_tracing_data,
        .build_id = event__process_build_id,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 extern volatile int session_done;
@@ -307,7 +310,7 @@ static int __cmd_report(void)
 
        signal(SIGINT, sig_handler);
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -442,6 +445,8 @@ static const struct option options[] = {
                    "dump raw trace in ASCII"),
        OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
                   "file", "vmlinux pathname"),
+       OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+                  "file", "kallsyms pathname"),
        OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
        OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
                    "load module symbols - WARNING: use only with -k and LIVE kernel"),
@@ -478,6 +483,8 @@ static const struct option options[] = {
                   "columns '.' is reserved."),
        OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
                    "Only display entries resolved to a symbol"),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index 55f3b5dcc731417198a2e5fd29ac8eefd96e1a5e..7a4ebeb8b016b4ca01c14f393b3c5fab98567523 100644 (file)
@@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
                process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
        struct thread *thread;
 
        if (!(session->sample_type & PERF_SAMPLE_RAW))
                return 0;
 
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = -1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, data.pid);
+       thread = perf_session__findnew(session, sample->pid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                         event->header.type);
@@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
        dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-       if (profile_cpu != -1 && profile_cpu != (int)data.cpu)
+       if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
                return 0;
 
-       process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread);
+       process_raw_event(event, session, sample->raw_data, sample->cpu,
+                         sample->time, thread);
 
        return 0;
 }
@@ -1652,7 +1643,8 @@ static struct perf_event_ops event_ops = {
 static int read_events(void)
 {
        int err = -EINVAL;
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -1869,6 +1861,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
@@ -1888,10 +1883,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
                usage_with_options(sched_usage, sched_options);
 
        /*
-        * Aliased to 'perf trace' for now:
+        * Aliased to 'perf script' for now:
         */
-       if (!strcmp(argv[0], "trace"))
-               return cmd_trace(argc, argv, prefix);
+       if (!strcmp(argv[0], "script"))
+               return cmd_script(argc, argv, prefix);
 
        symbol__init();
        if (!strncmp(argv[0], "rec", 3)) {
similarity index 83%
rename from tools/perf/builtin-trace.c
rename to tools/perf/builtin-script.c
index 86cfe3800e6bf5580718fd3df3c3788f9edbda31..43480fd66db7ebed659efe27d0415e8f68882330 100644 (file)
@@ -56,29 +56,18 @@ static void setup_scripting(void)
 
 static int cleanup_scripting(void)
 {
-       pr_debug("\nperf trace script stopped\n");
+       pr_debug("\nperf script stopped\n");
 
        return scripting_ops->stop_script();
 }
 
 static char const              *input_name = "perf.data";
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
-       struct thread *thread;
+       struct thread *thread = perf_session__findnew(session, event->ip.pid);
 
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = 1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, event->ip.pid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                         event->header.type);
@@ -87,13 +76,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
        if (session->sample_type & PERF_SAMPLE_RAW) {
                if (debug_mode) {
-                       if (data.time < last_timestamp) {
+                       if (sample->time < last_timestamp) {
                                pr_err("Samples misordered, previous: %llu "
                                        "this: %llu\n", last_timestamp,
-                                       data.time);
+                                       sample->time);
                                nr_unordered++;
                        }
-                       last_timestamp = data.time;
+                       last_timestamp = sample->time;
                        return 0;
                }
                /*
@@ -101,18 +90,19 @@ static int process_sample_event(event_t *event, struct perf_session *session)
                 * field, although it should be the same than this perf
                 * event pid
                 */
-               scripting_ops->process_event(data.cpu, data.raw_data,
-                                            data.raw_size,
-                                            data.time, thread->comm);
+               scripting_ops->process_event(sample->cpu, sample->raw_data,
+                                            sample->raw_size,
+                                            sample->time, thread->comm);
        }
 
-       session->hists.stats.total_period += data.period;
+       session->hists.stats.total_period += sample->period;
        return 0;
 }
 
 static u64 nr_lost;
 
-static int process_lost_event(event_t *event, struct perf_session *session __used)
+static int process_lost_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        nr_lost += event->lost.lost;
 
@@ -127,6 +117,7 @@ static struct perf_event_ops event_ops = {
        .tracing_data = event__process_tracing_data,
        .build_id = event__process_build_id,
        .lost = process_lost_event,
+       .ordering_requires_timestamps = true,
        .ordered_samples = true,
 };
 
@@ -137,7 +128,7 @@ static void sig_handler(int sig __unused)
        session_done = 1;
 }
 
-static int __cmd_trace(struct perf_session *session)
+static int __cmd_script(struct perf_session *session)
 {
        int ret;
 
@@ -247,7 +238,7 @@ static void list_available_languages(void)
 
        fprintf(stderr, "\n");
        fprintf(stderr, "Scripting language extensions (used in "
-               "perf trace -s [spec:]script.[spec]):\n\n");
+               "perf script -s [spec:]script.[spec]):\n\n");
 
        list_for_each_entry(s, &script_specs, node)
                fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
@@ -301,17 +292,34 @@ static int parse_scriptname(const struct option *opt __used,
        return 0;
 }
 
-#define for_each_lang(scripts_dir, lang_dirent, lang_next)             \
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+static int is_directory(const char *base_path, const struct dirent *dent)
+{
+       char path[PATH_MAX];
+       struct stat st;
+
+       sprintf(path, "%s/%s", base_path, dent->d_name);
+       if (stat(path, &st))
+               return 0;
+
+       return S_ISDIR(st.st_mode);
+}
+
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
        while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) &&     \
               lang_next)                                               \
-               if (lang_dirent.d_type == DT_DIR &&                     \
+               if ((lang_dirent.d_type == DT_DIR ||                    \
+                    (lang_dirent.d_type == DT_UNKNOWN &&               \
+                     is_directory(scripts_path, &lang_dirent))) &&     \
                    (strcmp(lang_dirent.d_name, ".")) &&                \
                    (strcmp(lang_dirent.d_name, "..")))
 
-#define for_each_script(lang_dir, script_dirent, script_next)          \
+#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
        while (!readdir_r(lang_dir, &script_dirent, &script_next) &&    \
               script_next)                                             \
-               if (script_dirent.d_type != DT_DIR)
+               if (script_dirent.d_type != DT_DIR &&                   \
+                   (script_dirent.d_type != DT_UNKNOWN ||              \
+                    !is_directory(lang_path, &script_dirent)))
 
 
 #define RECORD_SUFFIX                  "-record"
@@ -380,10 +388,10 @@ out_delete_desc:
        return NULL;
 }
 
-static char *ends_with(char *str, const char *suffix)
+static const char *ends_with(const char *str, const char *suffix)
 {
        size_t suffix_len = strlen(suffix);
-       char *p = str;
+       const char *p = str;
 
        if (strlen(str) > suffix_len) {
                p = str + strlen(str) - suffix_len;
@@ -466,16 +474,16 @@ static int list_available_scripts(const struct option *opt __used,
        if (!scripts_dir)
                return -1;
 
-       for_each_lang(scripts_dir, lang_dirent, lang_next) {
+       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
                         lang_dirent.d_name);
                lang_dir = opendir(lang_path);
                if (!lang_dir)
                        continue;
 
-               for_each_script(lang_dir, script_dirent, script_next) {
+               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
                        script_root = strdup(script_dirent.d_name);
-                       str = ends_with(script_root, REPORT_SUFFIX);
+                       str = (char *)ends_with(script_root, REPORT_SUFFIX);
                        if (str) {
                                *str = '\0';
                                desc = script_desc__findnew(script_root);
@@ -514,16 +522,16 @@ static char *get_script_path(const char *script_root, const char *suffix)
        if (!scripts_dir)
                return NULL;
 
-       for_each_lang(scripts_dir, lang_dirent, lang_next) {
+       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
                         lang_dirent.d_name);
                lang_dir = opendir(lang_path);
                if (!lang_dir)
                        continue;
 
-               for_each_script(lang_dir, script_dirent, script_next) {
+               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
                        __script_root = strdup(script_dirent.d_name);
-                       str = ends_with(__script_root, suffix);
+                       str = (char *)ends_with(__script_root, suffix);
                        if (str) {
                                *str = '\0';
                                if (strcmp(__script_root, script_root))
@@ -543,7 +551,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
 
 static bool is_top_script(const char *script_path)
 {
-       return ends_with((char *)script_path, "top") == NULL ? false : true;
+       return ends_with(script_path, "top") == NULL ? false : true;
 }
 
 static int has_required_arg(char *script_path)
@@ -569,12 +577,12 @@ out:
        return n_args;
 }
 
-static const char * const trace_usage[] = {
-       "perf trace [<options>]",
-       "perf trace [<options>] record <script> [<record-options>] <command>",
-       "perf trace [<options>] report <script> [script-args]",
-       "perf trace [<options>] <script> [<record-options>] <command>",
-       "perf trace [<options>] <top-script> [script-args]",
+static const char * const script_usage[] = {
+       "perf script [<options>]",
+       "perf script [<options>] record <script> [<record-options>] <command>",
+       "perf script [<options>] report <script> [script-args]",
+       "perf script [<options>] <script> [<record-options>] <command>",
+       "perf script [<options>] <top-script> [script-args]",
        NULL
 };
 
@@ -591,7 +599,7 @@ static const struct option options[] = {
                     "script file name (lang:script name, script name, or *)",
                     parse_scriptname),
        OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
-                  "generate perf-trace.xx script in specified language"),
+                  "generate perf-script.xx script in specified language"),
        OPT_STRING('i', "input", &input_name, "file",
                    "input file name"),
        OPT_BOOLEAN('d', "debug-mode", &debug_mode,
@@ -614,7 +622,7 @@ static bool have_cmd(int argc, const char **argv)
        return argc != 0;
 }
 
-int cmd_trace(int argc, const char **argv, const char *prefix __used)
+int cmd_script(int argc, const char **argv, const char *prefix __used)
 {
        char *rec_script_path = NULL;
        char *rep_script_path = NULL;
@@ -626,7 +634,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
 
        setup_scripting();
 
-       argc = parse_options(argc, argv, options, trace_usage,
+       argc = parse_options(argc, argv, options, script_usage,
                             PARSE_OPT_STOP_AT_NON_OPTION);
 
        if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
@@ -640,7 +648,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                if (!rep_script_path) {
                        fprintf(stderr,
                                "Please specify a valid report script"
-                               "(see 'perf trace -l' for listing)\n");
+                               "(see 'perf script -l' for listing)\n");
                        return -1;
                }
        }
@@ -658,8 +666,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
 
                if (!rec_script_path && !rep_script_path) {
                        fprintf(stderr, " Couldn't find script %s\n\n See perf"
-                               " trace -l for available scripts.\n", argv[0]);
-                       usage_with_options(trace_usage, options);
+                               " script -l for available scripts.\n", argv[0]);
+                       usage_with_options(script_usage, options);
                }
 
                if (is_top_script(argv[0])) {
@@ -671,9 +679,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                        rec_args = (argc - 1) - rep_args;
                        if (rec_args < 0) {
                                fprintf(stderr, " %s script requires options."
-                                       "\n\n See perf trace -l for available "
+                                       "\n\n See perf script -l for available "
                                        "scripts and options.\n", argv[0]);
-                               usage_with_options(trace_usage, options);
+                               usage_with_options(script_usage, options);
                        }
                }
 
@@ -772,7 +780,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
        if (!script_name)
                setup_pager();
 
-       session = perf_session__new(input_name, O_RDONLY, 0, false);
+       session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -806,7 +814,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                        return -1;
                }
 
-               err = scripting_ops->generate_script("perf-trace");
+               err = scripting_ops->generate_script("perf-script");
                goto out;
        }
 
@@ -814,10 +822,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                err = scripting_ops->start_script(script_name, argc, argv);
                if (err)
                        goto out;
-               pr_debug("perf trace started with script %s\n\n", script_name);
+               pr_debug("perf script started with script %s\n\n", script_name);
        }
 
-       err = __cmd_trace(session);
+       err = __cmd_script(session);
 
        perf_session__delete(session);
        cleanup_scripting();
index a6b4d44f950246e27d4cb6b0bc3e6d5afd27adcc..7ff746da7e6c2d4810ad0ece5e54551505acf023 100644 (file)
@@ -52,6 +52,8 @@
 #include <math.h>
 #include <locale.h>
 
+#define DEFAULT_SEPARATOR      " "
+
 static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
@@ -75,20 +77,30 @@ static int                  run_idx                         =  0;
 static int                     run_count                       =  1;
 static bool                    no_inherit                      = false;
 static bool                    scale                           =  true;
+static bool                    no_aggr                         = false;
 static pid_t                   target_pid                      = -1;
 static pid_t                   target_tid                      = -1;
 static pid_t                   *all_tids                       =  NULL;
 static int                     thread_num                      =  0;
 static pid_t                   child_pid                       = -1;
 static bool                    null_run                        =  false;
-static bool                    big_num                         =  false;
+static bool                    big_num                         =  true;
+static int                     big_num_opt                     =  -1;
 static const char              *cpu_list;
+static const char              *csv_sep                        = NULL;
+static bool                    csv_output                      = false;
 
 
 static int                     *fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static int                     event_scaled[MAX_COUNTERS];
 
+static struct {
+       u64 val;
+       u64 ena;
+       u64 run;
+} cpu_counts[MAX_NR_CPUS][MAX_COUNTERS];
+
 static volatile int done = 0;
 
 struct stats
@@ -136,19 +148,19 @@ static double stddev_stats(struct stats *stats)
 }
 
 struct stats                   event_res_stats[MAX_COUNTERS][3];
-struct stats                   runtime_nsecs_stats;
+struct stats                   runtime_nsecs_stats[MAX_NR_CPUS];
+struct stats                   runtime_cycles_stats[MAX_NR_CPUS];
+struct stats                   runtime_branches_stats[MAX_NR_CPUS];
 struct stats                   walltime_nsecs_stats;
-struct stats                   runtime_cycles_stats;
-struct stats                   runtime_branches_stats;
 
 #define MATCH_EVENT(t, c, counter)                     \
        (attrs[counter].type == PERF_TYPE_##t &&        \
         attrs[counter].config == PERF_COUNT_##c)
 
 #define ERR_PERF_OPEN \
-"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
+"counter %d, sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information."
 
-static int create_perf_stat_counter(int counter)
+static int create_perf_stat_counter(int counter, bool *perm_err)
 {
        struct perf_event_attr *attr = attrs + counter;
        int thread;
@@ -164,11 +176,14 @@ static int create_perf_stat_counter(int counter)
                for (cpu = 0; cpu < nr_cpus; cpu++) {
                        fd[cpu][counter][0] = sys_perf_event_open(attr,
                                        -1, cpumap[cpu], -1, 0);
-                       if (fd[cpu][counter][0] < 0)
-                               pr_debug(ERR_PERF_OPEN, counter,
+                       if (fd[cpu][counter][0] < 0) {
+                               if (errno == EPERM || errno == EACCES)
+                                       *perm_err = true;
+                               error(ERR_PERF_OPEN, counter,
                                         fd[cpu][counter][0], strerror(errno));
-                       else
+                       } else {
                                ++ncreated;
+                       }
                }
        } else {
                attr->inherit = !no_inherit;
@@ -179,12 +194,15 @@ static int create_perf_stat_counter(int counter)
                for (thread = 0; thread < thread_num; thread++) {
                        fd[0][counter][thread] = sys_perf_event_open(attr,
                                all_tids[thread], -1, -1, 0);
-                       if (fd[0][counter][thread] < 0)
-                               pr_debug(ERR_PERF_OPEN, counter,
+                       if (fd[0][counter][thread] < 0) {
+                               if (errno == EPERM || errno == EACCES)
+                                       *perm_err = true;
+                               error(ERR_PERF_OPEN, counter,
                                         fd[0][counter][thread],
                                         strerror(errno));
-                       else
+                       } else {
                                ++ncreated;
+                       }
                }
        }
 
@@ -205,8 +223,9 @@ static inline int nsec_counter(int counter)
 
 /*
  * Read out the results of a single counter:
+ * aggregate counts across CPUs in system-wide mode
  */
-static void read_counter(int counter)
+static void read_counter_aggr(int counter)
 {
        u64 count[3], single_count[3];
        int cpu;
@@ -264,11 +283,58 @@ static void read_counter(int counter)
         * Save the full runtime - to allow normalization during printout:
         */
        if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
-               update_stats(&runtime_nsecs_stats, count[0]);
+               update_stats(&runtime_nsecs_stats[0], count[0]);
        if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
-               update_stats(&runtime_cycles_stats, count[0]);
+               update_stats(&runtime_cycles_stats[0], count[0]);
        if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
-               update_stats(&runtime_branches_stats, count[0]);
+               update_stats(&runtime_branches_stats[0], count[0]);
+}
+
+/*
+ * Read out the results of a single counter:
+ * do not aggregate counts across CPUs in system-wide mode
+ */
+static void read_counter(int counter)
+{
+       u64 count[3];
+       int cpu;
+       size_t res, nv;
+
+       count[0] = count[1] = count[2] = 0;
+
+       nv = scale ? 3 : 1;
+
+       for (cpu = 0; cpu < nr_cpus; cpu++) {
+
+               if (fd[cpu][counter][0] < 0)
+                       continue;
+
+               res = read(fd[cpu][counter][0], count, nv * sizeof(u64));
+
+               assert(res == nv * sizeof(u64));
+
+               close(fd[cpu][counter][0]);
+               fd[cpu][counter][0] = -1;
+
+               if (scale) {
+                       if (count[2] == 0) {
+                               count[0] = 0;
+                       } else if (count[2] < count[1]) {
+                               count[0] = (unsigned long long)
+                               ((double)count[0] * count[1] / count[2] + 0.5);
+                       }
+               }
+               cpu_counts[cpu][counter].val = count[0]; /* scaled count */
+               cpu_counts[cpu][counter].ena = count[1];
+               cpu_counts[cpu][counter].run = count[2];
+
+               if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
+                       update_stats(&runtime_nsecs_stats[cpu], count[0]);
+               if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
+                       update_stats(&runtime_cycles_stats[cpu], count[0]);
+               if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
+                       update_stats(&runtime_branches_stats[cpu], count[0]);
+       }
 }
 
 static int run_perf_stat(int argc __used, const char **argv)
@@ -277,6 +343,7 @@ static int run_perf_stat(int argc __used, const char **argv)
        int status = 0;
        int counter, ncreated = 0;
        int child_ready_pipe[2], go_pipe[2];
+       bool perm_err = false;
        const bool forks = (argc > 0);
        char buf;
 
@@ -335,12 +402,15 @@ static int run_perf_stat(int argc __used, const char **argv)
        }
 
        for (counter = 0; counter < nr_counters; counter++)
-               ncreated += create_perf_stat_counter(counter);
-
-       if (ncreated == 0) {
-               pr_err("No permission to collect %sstats.\n"
-                      "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n",
-                      system_wide ? "system-wide " : "");
+               ncreated += create_perf_stat_counter(counter, &perm_err);
+
+       if (ncreated < nr_counters) {
+               if (perm_err)
+                       error("You may not have permission to collect %sstats.\n"
+                             "\t Consider tweaking"
+                             " /proc/sys/kernel/perf_event_paranoid or running as root.",
+                             system_wide ? "system-wide " : "");
+               die("Not all events could be opened.\n");
                if (child_pid != -1)
                        kill(child_pid, SIGTERM);
                return -1;
@@ -362,9 +432,13 @@ static int run_perf_stat(int argc __used, const char **argv)
 
        update_stats(&walltime_nsecs_stats, t1 - t0);
 
-       for (counter = 0; counter < nr_counters; counter++)
-               read_counter(counter);
-
+       if (no_aggr) {
+               for (counter = 0; counter < nr_counters; counter++)
+                       read_counter(counter);
+       } else {
+               for (counter = 0; counter < nr_counters; counter++)
+                       read_counter_aggr(counter);
+       }
        return WEXITSTATUS(status);
 }
 
@@ -377,11 +451,21 @@ static void print_noise(int counter, double avg)
                        100 * stddev_stats(&event_res_stats[counter][0]) / avg);
 }
 
-static void nsec_printout(int counter, double avg)
+static void nsec_printout(int cpu, int counter, double avg)
 {
        double msecs = avg / 1e6;
+       char cpustr[16] = { '\0', };
+       const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
+
+       if (no_aggr)
+               sprintf(cpustr, "CPU%*d%s",
+                       csv_output ? 0 : -4,
+                       cpumap[cpu], csv_sep);
 
-       fprintf(stderr, " %18.6f  %-24s", msecs, event_name(counter));
+       fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter));
+
+       if (csv_output)
+               return;
 
        if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
                fprintf(stderr, " # %10.3f CPUs ",
@@ -389,33 +473,49 @@ static void nsec_printout(int counter, double avg)
        }
 }
 
-static void abs_printout(int counter, double avg)
+static void abs_printout(int cpu, int counter, double avg)
 {
        double total, ratio = 0.0;
+       char cpustr[16] = { '\0', };
+       const char *fmt;
+
+       if (csv_output)
+               fmt = "%s%.0f%s%s";
+       else if (big_num)
+               fmt = "%s%'18.0f%s%-24s";
+       else
+               fmt = "%s%18.0f%s%-24s";
 
-       if (big_num)
-               fprintf(stderr, " %'18.0f  %-24s", avg, event_name(counter));
+       if (no_aggr)
+               sprintf(cpustr, "CPU%*d%s",
+                       csv_output ? 0 : -4,
+                       cpumap[cpu], csv_sep);
        else
-               fprintf(stderr, " %18.0f  %-24s", avg, event_name(counter));
+               cpu = 0;
+
+       fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter));
+
+       if (csv_output)
+               return;
 
        if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
-               total = avg_stats(&runtime_cycles_stats);
+               total = avg_stats(&runtime_cycles_stats[cpu]);
 
                if (total)
                        ratio = avg / total;
 
                fprintf(stderr, " # %10.3f IPC  ", ratio);
        } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
-                       runtime_branches_stats.n != 0) {
-               total = avg_stats(&runtime_branches_stats);
+                       runtime_branches_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_branches_stats[cpu]);
 
                if (total)
                        ratio = avg * 100 / total;
 
                fprintf(stderr, " # %10.3f %%    ", ratio);
 
-       } else if (runtime_nsecs_stats.n != 0) {
-               total = avg_stats(&runtime_nsecs_stats);
+       } else if (runtime_nsecs_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_nsecs_stats[cpu]);
 
                if (total)
                        ratio = 1000.0 * avg / total;
@@ -426,22 +526,29 @@ static void abs_printout(int counter, double avg)
 
 /*
  * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
  */
-static void print_counter(int counter)
+static void print_counter_aggr(int counter)
 {
        double avg = avg_stats(&event_res_stats[counter][0]);
        int scaled = event_scaled[counter];
 
        if (scaled == -1) {
-               fprintf(stderr, " %18s  %-24s\n",
-                       "<not counted>", event_name(counter));
+               fprintf(stderr, "%*s%s%-24s\n",
+                       csv_output ? 0 : 18,
+                       "<not counted>", csv_sep, event_name(counter));
                return;
        }
 
        if (nsec_counter(counter))
-               nsec_printout(counter, avg);
+               nsec_printout(-1, counter, avg);
        else
-               abs_printout(counter, avg);
+               abs_printout(-1, counter, avg);
+
+       if (csv_output) {
+               fputc('\n', stderr);
+               return;
+       }
 
        print_noise(counter, avg);
 
@@ -458,40 +565,91 @@ static void print_counter(int counter)
        fprintf(stderr, "\n");
 }
 
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(int counter)
+{
+       u64 ena, run, val;
+       int cpu;
+
+       for (cpu = 0; cpu < nr_cpus; cpu++) {
+               val = cpu_counts[cpu][counter].val;
+               ena = cpu_counts[cpu][counter].ena;
+               run = cpu_counts[cpu][counter].run;
+               if (run == 0 || ena == 0) {
+                       fprintf(stderr, "CPU%*d%s%*s%s%-24s",
+                               csv_output ? 0 : -4,
+                               cpumap[cpu], csv_sep,
+                               csv_output ? 0 : 18,
+                               "<not counted>", csv_sep,
+                               event_name(counter));
+
+                       fprintf(stderr, "\n");
+                       continue;
+               }
+
+               if (nsec_counter(counter))
+                       nsec_printout(cpu, counter, val);
+               else
+                       abs_printout(cpu, counter, val);
+
+               if (!csv_output) {
+                       print_noise(counter, 1.0);
+
+                       if (run != ena) {
+                               fprintf(stderr, "  (scaled from %.2f%%)",
+                                       100.0 * run / ena);
+                       }
+               }
+               fprintf(stderr, "\n");
+       }
+}
+
 static void print_stat(int argc, const char **argv)
 {
        int i, counter;
 
        fflush(stdout);
 
-       fprintf(stderr, "\n");
-       fprintf(stderr, " Performance counter stats for ");
-       if(target_pid == -1 && target_tid == -1) {
-               fprintf(stderr, "\'%s", argv[0]);
-               for (i = 1; i < argc; i++)
-                       fprintf(stderr, " %s", argv[i]);
-       } else if (target_pid != -1)
-               fprintf(stderr, "process id \'%d", target_pid);
-       else
-               fprintf(stderr, "thread id \'%d", target_tid);
-
-       fprintf(stderr, "\'");
-       if (run_count > 1)
-               fprintf(stderr, " (%d runs)", run_count);
-       fprintf(stderr, ":\n\n");
+       if (!csv_output) {
+               fprintf(stderr, "\n");
+               fprintf(stderr, " Performance counter stats for ");
+               if(target_pid == -1 && target_tid == -1) {
+                       fprintf(stderr, "\'%s", argv[0]);
+                       for (i = 1; i < argc; i++)
+                               fprintf(stderr, " %s", argv[i]);
+               } else if (target_pid != -1)
+                       fprintf(stderr, "process id \'%d", target_pid);
+               else
+                       fprintf(stderr, "thread id \'%d", target_tid);
+
+               fprintf(stderr, "\'");
+               if (run_count > 1)
+                       fprintf(stderr, " (%d runs)", run_count);
+               fprintf(stderr, ":\n\n");
+       }
 
-       for (counter = 0; counter < nr_counters; counter++)
-               print_counter(counter);
+       if (no_aggr) {
+               for (counter = 0; counter < nr_counters; counter++)
+                       print_counter(counter);
+       } else {
+               for (counter = 0; counter < nr_counters; counter++)
+                       print_counter_aggr(counter);
+       }
 
-       fprintf(stderr, "\n");
-       fprintf(stderr, " %18.9f  seconds time elapsed",
-                       avg_stats(&walltime_nsecs_stats)/1e9);
-       if (run_count > 1) {
-               fprintf(stderr, "   ( +- %7.3f%% )",
+       if (!csv_output) {
+               fprintf(stderr, "\n");
+               fprintf(stderr, " %18.9f  seconds time elapsed",
+                               avg_stats(&walltime_nsecs_stats)/1e9);
+               if (run_count > 1) {
+                       fprintf(stderr, "   ( +- %7.3f%% )",
                                100*stddev_stats(&walltime_nsecs_stats) /
                                avg_stats(&walltime_nsecs_stats));
+               }
+               fprintf(stderr, "\n\n");
        }
-       fprintf(stderr, "\n\n");
 }
 
 static volatile int signr = -1;
@@ -521,6 +679,13 @@ static const char * const stat_usage[] = {
        NULL
 };
 
+static int stat__set_big_num(const struct option *opt __used,
+                            const char *s __used, int unset)
+{
+       big_num_opt = unset ? 0 : 1;
+       return 0;
+}
+
 static const struct option options[] = {
        OPT_CALLBACK('e', "event", NULL, "event",
                     "event selector. use 'perf list' to list available events",
@@ -541,10 +706,15 @@ static const struct option options[] = {
                    "repeat command and print average + stddev (max: 100)"),
        OPT_BOOLEAN('n', "null", &null_run,
                    "null run - dont start any counters"),
-       OPT_BOOLEAN('B', "big-num", &big_num,
-                   "print large numbers with thousands\' separators"),
+       OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
+                          "print large numbers with thousands\' separators",
+                          stat__set_big_num),
        OPT_STRING('C', "cpu", &cpu_list, "cpu",
                    "list of cpus to monitor in system-wide"),
+       OPT_BOOLEAN('A', "no-aggr", &no_aggr,
+                   "disable CPU count aggregation"),
+       OPT_STRING('x', "field-separator", &csv_sep, "separator",
+                  "print counts with custom separator"),
        OPT_END()
 };
 
@@ -557,11 +727,34 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 
        argc = parse_options(argc, argv, options, stat_usage,
                PARSE_OPT_STOP_AT_NON_OPTION);
+
+       if (csv_sep)
+               csv_output = true;
+       else
+               csv_sep = DEFAULT_SEPARATOR;
+
+       /*
+        * let the spreadsheet do the pretty-printing
+        */
+       if (csv_output) {
+               /* User explicitely passed -B? */
+               if (big_num_opt == 1) {
+                       fprintf(stderr, "-B option not supported with -x\n");
+                       usage_with_options(stat_usage, options);
+               } else /* Nope, so disable big number formatting */
+                       big_num = false;
+       } else if (big_num_opt == 0) /* User passed --no-big-num */
+               big_num = false;
+
        if (!argc && target_pid == -1 && target_tid == -1)
                usage_with_options(stat_usage, options);
        if (run_count <= 0)
                usage_with_options(stat_usage, options);
 
+       /* no_aggr is for system-wide only */
+       if (no_aggr && !system_wide)
+               usage_with_options(stat_usage, options);
+
        /* Set attrs and nr_counters if no event is selected and !null_run */
        if (!null_run && !nr_counters) {
                memcpy(attrs, default_attrs, sizeof(default_attrs));
index 035b9fa063a9453002873c00f654adcd13a99eb2..e0c3f471f22d0961e949044fd3be483e7c59ded0 100644 (file)
@@ -119,10 +119,16 @@ static int test__vmlinux_matches_kallsyms(void)
         * end addresses too.
         */
        for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
-               struct symbol *pair;
+               struct symbol *pair, *first_pair;
+               bool backwards = true;
 
                sym  = rb_entry(nd, struct symbol, rb_node);
-               pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+
+               if (sym->start == sym->end)
+                       continue;
+
+               first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+               pair = first_pair;
 
                if (pair && pair->start == sym->start) {
 next_pair:
@@ -143,8 +149,10 @@ next_pair:
                                pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n",
                                         sym->start, sym->name, sym->end, pair->end);
                        } else {
-                               struct rb_node *nnd = rb_prev(&pair->rb_node);
-
+                               struct rb_node *nnd;
+detour:
+                               nnd = backwards ? rb_prev(&pair->rb_node) :
+                                                 rb_next(&pair->rb_node);
                                if (nnd) {
                                        struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
 
@@ -153,6 +161,13 @@ next_pair:
                                                goto next_pair;
                                        }
                                }
+
+                               if (backwards) {
+                                       backwards = false;
+                                       pair = first_pair;
+                                       goto detour;
+                               }
+
                                pr_debug("%#Lx: diff name v: %s k: %s\n",
                                         sym->start, sym->name, pair->name);
                        }
index 9bcc38f0b706f91ca3701e440c15e3e9c8aa7bd3..d75084bccdb79d4402bd8066212d26e4a4571c74 100644 (file)
@@ -272,19 +272,22 @@ static int cpus_cstate_state[MAX_CPUS];
 static u64 cpus_pstate_start_times[MAX_CPUS];
 static u64 cpus_pstate_state[MAX_CPUS];
 
-static int process_comm_event(event_t *event, struct perf_session *session __used)
+static int process_comm_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_set_comm(event->comm.tid, event->comm.comm);
        return 0;
 }
 
-static int process_fork_event(event_t *event, struct perf_session *session __used)
+static int process_fork_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
        return 0;
 }
 
-static int process_exit_event(event_t *event, struct perf_session *session __used)
+static int process_exit_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_exit(event->fork.pid, event->fork.time);
        return 0;
@@ -470,24 +473,21 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
 }
 
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event __used,
+                               struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
        struct trace_entry *te;
 
-       memset(&data, 0, sizeof(data));
-
-       event__parse_sample(event, session->sample_type, &data);
-
        if (session->sample_type & PERF_SAMPLE_TIME) {
-               if (!first_time || first_time > data.time)
-                       first_time = data.time;
-               if (last_time < data.time)
-                       last_time = data.time;
+               if (!first_time || first_time > sample->time)
+                       first_time = sample->time;
+               if (last_time < sample->time)
+                       last_time = sample->time;
        }
 
-       te = (void *)data.raw_data;
-       if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) {
+       te = (void *)sample->raw_data;
+       if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
                char *event_str;
                struct power_entry *pe;
 
@@ -499,19 +499,19 @@ static int process_sample_event(event_t *event, struct perf_session *session)
                        return 0;
 
                if (strcmp(event_str, "power:power_start") == 0)
-                       c_state_start(pe->cpu_id, data.time, pe->value);
+                       c_state_start(pe->cpu_id, sample->time, pe->value);
 
                if (strcmp(event_str, "power:power_end") == 0)
-                       c_state_end(pe->cpu_id, data.time);
+                       c_state_end(pe->cpu_id, sample->time);
 
                if (strcmp(event_str, "power:power_frequency") == 0)
-                       p_state_change(pe->cpu_id, data.time, pe->value);
+                       p_state_change(pe->cpu_id, sample->time, pe->value);
 
                if (strcmp(event_str, "sched:sched_wakeup") == 0)
-                       sched_wakeup(data.cpu, data.time, data.pid, te);
+                       sched_wakeup(sample->cpu, sample->time, sample->pid, te);
 
                if (strcmp(event_str, "sched:sched_switch") == 0)
-                       sched_switch(data.cpu, data.time, te);
+                       sched_switch(sample->cpu, sample->time, te);
        }
        return 0;
 }
@@ -937,7 +937,8 @@ static struct perf_event_ops event_ops = {
 
 static int __cmd_timechart(void)
 {
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        int ret = -EINVAL;
 
        if (session == NULL)
@@ -989,6 +990,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
@@ -1018,6 +1022,8 @@ static const struct option options[] = {
        OPT_CALLBACK('p', "process", NULL, "process",
                      "process selector. Pass a pid or process name.",
                       parse_process),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index dd625808c2a5332c4f733a59acfb1ee881faae3f..ae15f046c405844835d2de82332aae7c39cd84fe 100644 (file)
@@ -977,12 +977,12 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 }
 
 static void event__process_sample(const event_t *self,
-                                struct perf_session *session, int counter)
+                                 struct sample_data *sample,
+                                 struct perf_session *session, int counter)
 {
        u64 ip = self->ip.ip;
        struct sym_entry *syme;
        struct addr_location al;
-       struct sample_data data;
        struct machine *machine;
        u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
@@ -1025,7 +1025,7 @@ static void event__process_sample(const event_t *self,
        if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
                exact_samples++;
 
-       if (event__preprocess_sample(self, session, &al, &data,
+       if (event__preprocess_sample(self, session, &al, sample,
                                     symbol_filter) < 0 ||
            al.filtered)
                return;
@@ -1105,6 +1105,7 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
        unsigned int head = mmap_read_head(md);
        unsigned int old = md->prev;
        unsigned char *data = md->base + page_size;
+       struct sample_data sample;
        int diff;
 
        /*
@@ -1152,10 +1153,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
                        event = &event_copy;
                }
 
+               event__parse_sample(event, self, &sample);
                if (event->header.type == PERF_RECORD_SAMPLE)
-                       event__process_sample(event, self, md->counter);
+                       event__process_sample(event, &sample, self, md->counter);
                else
-                       event__process(event, self);
+                       event__process(event, &sample, self);
                old += size;
        }
 
@@ -1214,7 +1216,9 @@ try_again:
                        int err = errno;
 
                        if (err == EPERM || err == EACCES)
-                               die("No permission - are you root?\n");
+                               die("Permission error - are you root?\n"
+                                       "\t Consider tweaking"
+                                       " /proc/sys/kernel/perf_event_paranoid.\n");
                        /*
                         * If it's cycles then fall back to hrtimer
                         * based cpu-clock-tick sw counter, which
@@ -1231,7 +1235,7 @@ try_again:
                                goto try_again;
                        }
                        printf("\n");
-                       error("perfcounter syscall returned with %d (%s)\n",
+                       error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
                                        fd[i][counter][thread_index], strerror(err));
                        die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
                        exit(-1);
@@ -1268,7 +1272,7 @@ static int __cmd_top(void)
         * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
         * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
         */
-       struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false);
+       struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
        if (session == NULL)
                return -ENOMEM;
 
index 921245b28583e448cce49008b2482e0f59193454..c7798c7f24ed737f03a4376485be5a2e14b3aef4 100644 (file)
@@ -27,7 +27,7 @@ extern int cmd_report(int argc, const char **argv, const char *prefix);
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
 extern int cmd_timechart(int argc, const char **argv, const char *prefix);
 extern int cmd_top(int argc, const char **argv, const char *prefix);
-extern int cmd_trace(int argc, const char **argv, const char *prefix);
+extern int cmd_script(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
 extern int cmd_probe(int argc, const char **argv, const char *prefix);
 extern int cmd_kmem(int argc, const char **argv, const char *prefix);
index 949d77fc0b9718d812a8906883b7a89ef99c49f0..16b5088cf8f4bbb2259aee8ad90483cd926a0935 100644 (file)
@@ -16,7 +16,7 @@ perf-report                   mainporcelain common
 perf-stat                      mainporcelain common
 perf-timechart                 mainporcelain common
 perf-top                       mainporcelain common
-perf-trace                     mainporcelain common
+perf-script                    mainporcelain common
 perf-probe                     mainporcelain common
 perf-kmem                      mainporcelain common
 perf-lock                      mainporcelain common
index b253db634f04b7e8ddfddd1cc33bb3ce8343a49a..b041ca67a2cbdee01c87ff92ec21deda82ae1b5c 100644 (file)
@@ -9,8 +9,8 @@ endef
 ifndef NO_DWARF
 define SOURCE_DWARF
 #include <dwarf.h>
-#include <libdw.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
 #ifndef _ELFUTILS_PREREQ
 #error
 #endif
index cdd6c03f1e14c132e550b85e07b22e7621a710d2..595d0f4a7103eb45ccac7b5ad47fd3ebfee8ab0d 100644 (file)
@@ -323,7 +323,7 @@ static void handle_internal_command(int argc, const char **argv)
                { "top",        cmd_top,        0 },
                { "annotate",   cmd_annotate,   0 },
                { "version",    cmd_version,    0 },
-               { "trace",      cmd_trace,      0 },
+               { "script",     cmd_script,     0 },
                { "sched",      cmd_sched,      0 },
                { "probe",      cmd_probe,      0 },
                { "kmem",       cmd_kmem,       0 },
index 01a64ad693f2a7c4ac8600e8d37f9a90ffad1b00..790ceba6ad3f4a4102a1affa81a637ef774d7d43 100644 (file)
@@ -8,7 +8,7 @@
 
 #line 1 "Context.xs"
 /*
- * Context.xs.  XS interfaces for perf trace.
+ * Context.xs.  XS interfaces for perf script.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
index 549cf0467d309eda5be9c0faee7897a5608371b4..c1e2ed1ed34e4e16e3398acd12f3e5723a81ddee 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Context.xs.  XS interfaces for perf trace.
+ * Context.xs.  XS interfaces for perf script.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
@@ -23,7 +23,7 @@
 #include "perl.h"
 #include "XSUB.h"
 #include "../../../perf.h"
-#include "../../../util/trace-event.h"
+#include "../../../util/script-event.h"
 
 MODULE = Perf::Trace::Context          PACKAGE = Perf::Trace::Context
 PROTOTYPES: ENABLE
index 9a970763079144666b9bebb3f6d626bd7009f298..2f0c7f3043ee5d992727b50f11c404d33cfb009e 100644 (file)
@@ -1,7 +1,7 @@
 Perf-Trace-Util version 0.01
 ============================
 
-This module contains utility functions for use with perf trace.
+This module contains utility functions for use with perf script.
 
 Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
 that the core perf support for Perl calls on and should always be
@@ -33,7 +33,7 @@ After you do that:
 
 INSTALLATION
 
-Building perf with perf trace Perl scripting should install this
+Building perf with perf script Perl scripting should install this
 module in the right place.
 
 You should make sure libperl and ExtUtils/Embed.pm are installed first
index 6c7f3659cb1769ca8d40bd19816d54be94dd47ca..4e2f6039ac920f60192a7cee033339afbdf26042 100644 (file)
@@ -34,7 +34,7 @@ Perf::Trace::Context - Perl extension for accessing functions in perf.
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index 9df376a9f62971e355de96b14324c52fa9b469ea..9158458d3eeb118c35357a29af763f533e370247 100644 (file)
@@ -163,7 +163,7 @@ sub dump_symbolic_fields
 __END__
 =head1 NAME
 
-Perf::Trace::Core - Perl extension for perf trace
+Perf::Trace::Core - Perl extension for perf script
 
 =head1 SYNOPSIS
 
@@ -171,7 +171,7 @@ Perf::Trace::Core - Perl extension for perf trace
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index d94b40c8ac857227516b6f408bf5b13a9fea03cd..053500114625515d7745757178274e097c0d6aa2 100644 (file)
@@ -65,7 +65,7 @@ sub clear_term
 __END__
 =head1 NAME
 
-Perf::Trace::Util - Perl extension for perf trace
+Perf::Trace::Util - Perl extension for perf script
 
 =head1 SYNOPSIS
 
@@ -73,7 +73,7 @@ Perf::Trace::Util - Perl extension for perf trace
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index 4028d92dc4ae6602927d3c82c02f3973506d995e..9f83cc1ad8ba253acabff31a32987566976e6d7c 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
index ba25f4d41fb02a1d40303ebcdb8ba8f17ff8470d..77200b3f31003c7bd8c3a137106f461223b67a43 100644 (file)
@@ -7,7 +7,4 @@ if [ $# -lt 1 ] ; then
 fi
 comm=$1
 shift
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
index 641a3f5d085c6148e9437e10704f6953d7d1eca5..a27b9f311f959a626d9e481f39cbb65b060d743f 100644 (file)
@@ -1,6 +1,3 @@
 #!/bin/bash
 # description: system-wide r/w activity
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
index 4918dba77021e676fdfeaf6c7d3e90105979ccc8..83e11ec2e190988c142aab5d4644778b812b9447 100644 (file)
@@ -17,7 +17,4 @@ if [ "$n_args" -gt 0 ] ; then
     interval=$1
     shift
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
index 49052ebcb6326d8aa13ea309994d965ca045f58b..889e8130cca55c7235ae749c83c5a9aedb92d4fd 100644 (file)
@@ -1,6 +1,3 @@
 #!/bin/bash
 # description: system-wide min/max/avg wakeup latency
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
index df0c65f4ca93de35b07bbe9e24f5a966d8b75ea8..6d91411d248caa1a0f6ade3ee39645cfb51b631d 100644 (file)
@@ -1,7 +1,3 @@
 #!/bin/bash
 # description: workqueue stats (ins/exe/create/destroy)
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
-
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
index 4e7dc0a407a5fbf65d0bcfab434e6dfad23f0733..4e7076c2061610044f766dddff136bf785166256 100644 (file)
@@ -1,4 +1,4 @@
-# perf trace event handlers, generated by perf trace -g perl
+# perf script event handlers, generated by perf script -g perl
 # (c) 2009, Tom Zanussi <tzanussi@gmail.com>
 # Licensed under the terms of the GNU GPL License version 2
 
index 2a39097687b9f70dffb2fe83f4477539ff8e98b4..74844ee2be3ef691ce9fd1c7129a158b5080a975 100644 (file)
@@ -18,7 +18,7 @@ use lib "./Perf-Trace-Util/lib";
 use Perf::Trace::Core;
 use Perf::Trace::Util;
 
-my $usage = "perf trace -s rw-by-file.pl <comm>\n";
+my $usage = "perf script -s rw-by-file.pl <comm>\n";
 
 my $for_comm = shift or die $usage;
 
index b84b12699b70ba0731bdbfdd4115d62558990184..a8eaff5119e09fa953626d0ed58ddfabae18bb00 100644 (file)
@@ -10,7 +10,7 @@
 #     workqueue:workqueue_destruction -e workqueue:workqueue_execution
 #     -e workqueue:workqueue_insertion
 #
-#   perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl
+#   perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
 
 use 5.010000;
 use strict;
index 957085dd5d8d1a2ff17ee4bb6ad3c3160f8df508..315067b8f5522ae9cafbef1df506aca814e93012 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Context.c.  Python interfaces for perf trace.
+ * Context.c.  Python interfaces for perf script.
  *
  * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
  *
index aad7525bca1dc5a45ca72cd4be79eec7411a2bb0..de7211e4fa471ac0a0475f15097c0e175fdf4da9 100644 (file)
@@ -1,4 +1,4 @@
-# Core.py - Python extension for perf trace, core functions
+# Core.py - Python extension for perf script, core functions
 #
 # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
 #
index ae9a56e43e05e37981774e9dc9ec3d0ed59f6ba2..fdd92f699055713e2d1fec1c99a61489e5812a64 100644 (file)
@@ -1,4 +1,4 @@
-# SchedGui.py - Python extension for perf trace, basic GUI code for
+# SchedGui.py - Python extension for perf script, basic GUI code for
 #              traces drawing and overview.
 #
 # Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
index 13cc02b5893a7ee0a248b040eddc32cdc675a258..15c8400240fd9029ae34fca077304337d9c75ca6 100644 (file)
@@ -1,4 +1,4 @@
-# Util.py - Python extension for perf trace, miscellaneous utility code
+# Util.py - Python extension for perf script, miscellaneous utility code
 #
 # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
 #
index 03587021463d4ef6c7d25b4d0a852178fded5a86..fda5096d0cbf81a29792819c9648a43f89497d3c 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
index c8268138fb7e3e6d431be07bada51abed3022294..6c44271091abbb977b2a0ef725dea470626b1337 100644 (file)
@@ -1,4 +1,4 @@
 #!/bin/bash
 # description: futext contention measurement
 
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
index 4ad361b31249c03f10d424b842b92b6d9911acee..8f759291da86c07435a62e7fa044f8c75f9c2749 100644 (file)
@@ -2,4 +2,4 @@
 # description: display a process of packet and processing time
 # args: [tx] [rx] [dev=] [debug]
 
-perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
+perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
index df1791f07c24233c638e445d48ff3ab52955889c..68b037a1849b1aeb71ec86d21fb05af4e208fc87 100644 (file)
@@ -1,3 +1,3 @@
 #!/bin/bash
 # description: sched migration overview
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
index 36b409c05e50ac5e6f80f7b82189aaa0d2ba096a..c32db294124da91d2654302c9d8e32673ec2bc4c 100644 (file)
@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
     interval=$1
     shift
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
index 4eb88c9fc83ce7e99e14e8b004f71930b422d89a..16eb8d65c54335e08d1a95e7068e6d0df55be6b5 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
index cb2f9c5cf17e825972870c5c934500672e8bd15e..0f0e9d453bb48a606b3c6522a104bd16499fede8 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
index d9f7893e315c0d5aa064df04a8cc3ac870aebdad..4647a7694cf60a77835f3c80aeb79d54578df69d 100644 (file)
@@ -1,4 +1,4 @@
-# perf trace event handlers, generated by perf trace -g python
+# perf script event handlers, generated by perf script -g python
 # (c) 2010, Tom Zanussi <tzanussi@gmail.com>
 # Licensed under the terms of the GNU GPL License version 2
 #
index acd7848717b35ea7c0c46ae61dc01241673f936d..85805fac41167b9e531c09f51ca21595287e08aa 100644 (file)
@@ -15,7 +15,7 @@ from perf_trace_context import *
 from Core import *
 from Util import *
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
+usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n";
 
 for_comm = None
 for_pid = None
index b934383c3364e63ed7bd6147bf5509867c65e7e2..74d55ec08aed5ec27867b1d74682a5a0bb320748 100644 (file)
@@ -4,7 +4,7 @@
 #
 # Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
 #
-# perf trace event handlers have been generated by perf trace -g python
+# perf script event handlers have been generated by perf script -g python
 #
 # This software is distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
index 7a6ec2c7d8abe7bf01b660210811829f37eb572d..42c267e292fa36155f5d6b8270fb4b6943d9f6d0 100644 (file)
@@ -17,7 +17,7 @@ from perf_trace_context import *
 from Core import *
 from Util import *
 
-usage = "perf trace -s sctop.py [comm] [interval]\n";
+usage = "perf script -s sctop.py [comm] [interval]\n";
 
 for_comm = None
 default_interval = 3
index d1ee3ec10cf2b911776df81df7a5d4d66a5f5fc1..c64d1c55d745b7437e4e26f77446b87567c67ed1 100644 (file)
@@ -14,7 +14,7 @@ from perf_trace_context import *
 from Core import *
 from Util import syscall_name
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
+usage = "perf script -s syscall-counts-by-pid.py [comm]\n";
 
 for_comm = None
 for_pid = None
index ea183dc82d29e54a005f28648201a2219feac224..b435d3f188e84c421819802cb2efcefd62cff0d2 100644 (file)
@@ -15,7 +15,7 @@ from perf_trace_context import *
 from Core import *
 from Util import syscall_name
 
-usage = "perf trace -s syscall-counts.py [comm]\n";
+usage = "perf script -s syscall-counts.py [comm]\n";
 
 for_comm = None
 
index e437edb72417ba2f12e90b810f8851caef2c24fb..deffb8c960716213124b7fd36f56edf41bf8207d 100644 (file)
@@ -14,7 +14,9 @@
 #include <linux/kernel.h>
 #include "debug.h"
 
-static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
+static int build_id__mark_dso_hit(event_t *event,
+                                 struct sample_data *sample __used,
+                                 struct perf_session *session)
 {
        struct addr_location al;
        u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
        return 0;
 }
 
-static int event__exit_del_thread(event_t *self, struct perf_session *session)
+static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
+                                 struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->fork.tid);
 
index c8d81b00089d6d9d26ca431aea91453a86ef2351..01bbe8ecec3f7eda9088e9e59ad78ea53b7b5ce6 100644 (file)
@@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...)
        return ret;
 }
 
-static int dump_printf_color(const char *fmt, const char *color, ...)
+#ifdef NO_NEWT_SUPPORT
+void ui__warning(const char *format, ...)
 {
        va_list args;
-       int ret = 0;
 
-       if (dump_trace) {
-               va_start(args, color);
-               ret = color_vfprintf(stdout, color, fmt, args);
-               va_end(args);
-       }
-
-       return ret;
+       va_start(args, format);
+       vfprintf(stderr, format, args);
+       va_end(args);
 }
-
+#endif
 
 void trace_event(event_t *event)
 {
@@ -70,29 +66,29 @@ void trace_event(event_t *event)
        if (!dump_trace)
                return;
 
-       dump_printf(".");
-       dump_printf_color("\n. ... raw event: size %d bytes\n", color,
-                         event->header.size);
+       printf(".");
+       color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+                     event->header.size);
 
        for (i = 0; i < event->header.size; i++) {
                if ((i & 15) == 0) {
-                       dump_printf(".");
-                       dump_printf_color("  %04x: ", color, i);
+                       printf(".");
+                       color_fprintf(stdout, color, "  %04x: ", i);
                }
 
-               dump_printf_color(" %02x", color, raw_event[i]);
+               color_fprintf(stdout, color, " %02x", raw_event[i]);
 
                if (((i & 15) == 15) || i == event->header.size-1) {
-                       dump_printf_color("  ", color);
+                       color_fprintf(stdout, color, "  ");
                        for (j = 0; j < 15-(i & 15); j++)
-                               dump_printf_color("   ", color);
+                               color_fprintf(stdout, color, "   ");
                        for (j = i & ~15; j <= i; j++) {
-                               dump_printf_color("%c", color,
-                                               isprint(raw_event[j]) ?
-                                               raw_event[j] : '.');
+                               color_fprintf(stdout, color, "%c",
+                                             isprint(raw_event[j]) ?
+                                             raw_event[j] : '.');
                        }
-                       dump_printf_color("\n", color);
+                       color_fprintf(stdout, color, "\n");
                }
        }
-       dump_printf(".\n");
+       printf(".\n");
 }
index 7b514082bbaff4992c31c590cd6d5b3153149f85..ca35fd66b5dfc8c238f5a4be3cb28ce402cf0bf6 100644 (file)
@@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap);
 #include "ui/progress.h"
 #endif
 
+void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+
 #endif /* __PERF_DEBUG_H */
index dab9e754a28103b1727d6dee29669aaa2d7f89da..2302ec051bb4f1b5171bb543b33284b29bf6f8b6 100644 (file)
@@ -7,7 +7,7 @@
 #include "strlist.h"
 #include "thread.h"
 
-const char *event__name[] = {
+static const char *event__name[] = {
        [0]                      = "TOTAL",
        [PERF_RECORD_MMAP]       = "MMAP",
        [PERF_RECORD_LOST]       = "LOST",
@@ -22,13 +22,31 @@ const char *event__name[] = {
        [PERF_RECORD_HEADER_EVENT_TYPE]  = "EVENT_TYPE",
        [PERF_RECORD_HEADER_TRACING_DATA]        = "TRACING_DATA",
        [PERF_RECORD_HEADER_BUILD_ID]    = "BUILD_ID",
+       [PERF_RECORD_FINISHED_ROUND]     = "FINISHED_ROUND",
 };
 
-static pid_t event__synthesize_comm(pid_t pid, int full,
+const char *event__get_event_name(unsigned int id)
+{
+       if (id >= ARRAY_SIZE(event__name))
+               return "INVALID";
+       if (!event__name[id])
+               return "UNKNOWN";
+       return event__name[id];
+}
+
+static struct sample_data synth_sample = {
+       .pid       = -1,
+       .tid       = -1,
+       .time      = -1,
+       .stream_id = -1,
+       .cpu       = -1,
+       .period    = 1,
+};
+
+static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
                                    event__handler_t process,
                                    struct perf_session *session)
 {
-       event_t ev;
        char filename[PATH_MAX];
        char bf[BUFSIZ];
        FILE *fp;
@@ -49,34 +67,39 @@ out_race:
                return 0;
        }
 
-       memset(&ev.comm, 0, sizeof(ev.comm));
-       while (!ev.comm.comm[0] || !ev.comm.pid) {
-               if (fgets(bf, sizeof(bf), fp) == NULL)
-                       goto out_failure;
+       memset(&event->comm, 0, sizeof(event->comm));
+
+       while (!event->comm.comm[0] || !event->comm.pid) {
+               if (fgets(bf, sizeof(bf), fp) == NULL) {
+                       pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
+                       goto out;
+               }
 
                if (memcmp(bf, "Name:", 5) == 0) {
                        char *name = bf + 5;
                        while (*name && isspace(*name))
                                ++name;
                        size = strlen(name) - 1;
-                       memcpy(ev.comm.comm, name, size++);
+                       memcpy(event->comm.comm, name, size++);
                } else if (memcmp(bf, "Tgid:", 5) == 0) {
                        char *tgids = bf + 5;
                        while (*tgids && isspace(*tgids))
                                ++tgids;
-                       tgid = ev.comm.pid = atoi(tgids);
+                       tgid = event->comm.pid = atoi(tgids);
                }
        }
 
-       ev.comm.header.type = PERF_RECORD_COMM;
+       event->comm.header.type = PERF_RECORD_COMM;
        size = ALIGN(size, sizeof(u64));
-       ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size);
-
+       memset(event->comm.comm + size, 0, session->id_hdr_size);
+       event->comm.header.size = (sizeof(event->comm) -
+                               (sizeof(event->comm.comm) - size) +
+                               session->id_hdr_size);
        if (!full) {
-               ev.comm.tid = pid;
+               event->comm.tid = pid;
 
-               process(&ev, session);
-               goto out_fclose;
+               process(event, &synth_sample, session);
+               goto out;
        }
 
        snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -91,22 +114,19 @@ out_race:
                if (*end)
                        continue;
 
-               ev.comm.tid = pid;
+               event->comm.tid = pid;
 
-               process(&ev, session);
+               process(event, &synth_sample, session);
        }
-       closedir(tasks);
 
-out_fclose:
+       closedir(tasks);
+out:
        fclose(fp);
-       return tgid;
 
-out_failure:
-       pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
-       return -1;
+       return tgid;
 }
 
-static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
+static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
                                         event__handler_t process,
                                         struct perf_session *session)
 {
@@ -124,29 +144,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
                return -1;
        }
 
+       event->header.type = PERF_RECORD_MMAP;
+       /*
+        * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+        */
+       event->header.misc = PERF_RECORD_MISC_USER;
+
        while (1) {
                char bf[BUFSIZ], *pbf = bf;
-               event_t ev = {
-                       .header = {
-                               .type = PERF_RECORD_MMAP,
-                               /*
-                                * Just like the kernel, see __perf_event_mmap
-                                * in kernel/perf_event.c
-                                */
-                               .misc = PERF_RECORD_MISC_USER,
-                        },
-               };
                int n;
                size_t size;
                if (fgets(bf, sizeof(bf), fp) == NULL)
                        break;
 
                /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               n = hex2u64(pbf, &ev.mmap.start);
+               n = hex2u64(pbf, &event->mmap.start);
                if (n < 0)
                        continue;
                pbf += n + 1;
-               n = hex2u64(pbf, &ev.mmap.len);
+               n = hex2u64(pbf, &event->mmap.len);
                if (n < 0)
                        continue;
                pbf += n + 3;
@@ -161,19 +177,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
                                continue;
 
                        pbf += 3;
-                       n = hex2u64(pbf, &ev.mmap.pgoff);
+                       n = hex2u64(pbf, &event->mmap.pgoff);
 
                        size = strlen(execname);
                        execname[size - 1] = '\0'; /* Remove \n */
-                       memcpy(ev.mmap.filename, execname, size);
+                       memcpy(event->mmap.filename, execname, size);
                        size = ALIGN(size, sizeof(u64));
-                       ev.mmap.len -= ev.mmap.start;
-                       ev.mmap.header.size = (sizeof(ev.mmap) -
-                                              (sizeof(ev.mmap.filename) - size));
-                       ev.mmap.pid = tgid;
-                       ev.mmap.tid = pid;
-
-                       process(&ev, session);
+                       event->mmap.len -= event->mmap.start;
+                       event->mmap.header.size = (sizeof(event->mmap) -
+                                               (sizeof(event->mmap.filename) - size));
+                       memset(event->mmap.filename + size, 0, session->id_hdr_size);
+                       event->mmap.header.size += session->id_hdr_size;
+                       event->mmap.pid = tgid;
+                       event->mmap.tid = pid;
+
+                       process(event, &synth_sample, session);
                }
        }
 
@@ -187,20 +205,27 @@ int event__synthesize_modules(event__handler_t process,
 {
        struct rb_node *nd;
        struct map_groups *kmaps = &machine->kmaps;
-       u16 misc;
+       event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
+
+       event->header.type = PERF_RECORD_MMAP;
 
        /*
         * kernel uses 0 for user space maps, see kernel/perf_event.c
         * __perf_event_mmap
         */
        if (machine__is_host(machine))
-               misc = PERF_RECORD_MISC_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
        else
-               misc = PERF_RECORD_MISC_GUEST_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 
        for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
             nd; nd = rb_next(nd)) {
-               event_t ev;
                size_t size;
                struct map *pos = rb_entry(nd, struct map, rb_node);
 
@@ -208,39 +233,78 @@ int event__synthesize_modules(event__handler_t process,
                        continue;
 
                size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
-               memset(&ev, 0, sizeof(ev));
-               ev.mmap.header.misc = misc;
-               ev.mmap.header.type = PERF_RECORD_MMAP;
-               ev.mmap.header.size = (sizeof(ev.mmap) -
-                                       (sizeof(ev.mmap.filename) - size));
-               ev.mmap.start = pos->start;
-               ev.mmap.len   = pos->end - pos->start;
-               ev.mmap.pid   = machine->pid;
-
-               memcpy(ev.mmap.filename, pos->dso->long_name,
+               event->mmap.header.type = PERF_RECORD_MMAP;
+               event->mmap.header.size = (sizeof(event->mmap) -
+                                       (sizeof(event->mmap.filename) - size));
+               memset(event->mmap.filename + size, 0, session->id_hdr_size);
+               event->mmap.header.size += session->id_hdr_size;
+               event->mmap.start = pos->start;
+               event->mmap.len   = pos->end - pos->start;
+               event->mmap.pid   = machine->pid;
+
+               memcpy(event->mmap.filename, pos->dso->long_name,
                       pos->dso->long_name_len + 1);
-               process(&ev, session);
+               process(event, &synth_sample, session);
        }
 
+       free(event);
        return 0;
 }
 
-int event__synthesize_thread(pid_t pid, event__handler_t process,
-                            struct perf_session *session)
+static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
+                                     pid_t pid, event__handler_t process,
+                                     struct perf_session *session)
 {
-       pid_t tgid = event__synthesize_comm(pid, 1, process, session);
+       pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
+                                           session);
        if (tgid == -1)
                return -1;
-       return event__synthesize_mmap_events(pid, tgid, process, session);
+       return event__synthesize_mmap_events(mmap_event, pid, tgid,
+                                            process, session);
+}
+
+int event__synthesize_thread(pid_t pid, event__handler_t process,
+                            struct perf_session *session)
+{
+       event_t *comm_event, *mmap_event;
+       int err = -1;
+
+       comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
+
+       err = __event__synthesize_thread(comm_event, mmap_event, pid,
+                                        process, session);
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
 }
 
-void event__synthesize_threads(event__handler_t process,
-                              struct perf_session *session)
+int event__synthesize_threads(event__handler_t process,
+                             struct perf_session *session)
 {
        DIR *proc;
        struct dirent dirent, *next;
+       event_t *comm_event, *mmap_event;
+       int err = -1;
+
+       comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
 
        proc = opendir("/proc");
+       if (proc == NULL)
+               goto out_free_mmap;
 
        while (!readdir_r(proc, &dirent, &next) && next) {
                char *end;
@@ -249,10 +313,18 @@ void event__synthesize_threads(event__handler_t process,
                if (*end) /* only interested in proper numerical dirents */
                        continue;
 
-               event__synthesize_thread(pid, process, session);
+               __event__synthesize_thread(comm_event, mmap_event, pid,
+                                          process, session);
        }
 
        closedir(proc);
+       err = 0;
+out_free_mmap:
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
 }
 
 struct process_symbol_args {
@@ -260,7 +332,8 @@ struct process_symbol_args {
        u64        start;
 };
 
-static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
+static int find_symbol_cb(void *arg, const char *name, char type,
+                         u64 start, u64 end __used)
 {
        struct process_symbol_args *args = arg;
 
@@ -286,18 +359,20 @@ int event__synthesize_kernel_mmap(event__handler_t process,
        char path[PATH_MAX];
        char name_buff[PATH_MAX];
        struct map *map;
-
-       event_t ev = {
-               .header = {
-                       .type = PERF_RECORD_MMAP,
-               },
-       };
+       int err;
        /*
         * We should get this from /sys/kernel/sections/.text, but till that is
         * available use this, and after it is use this as a fallback for older
         * kernels.
         */
        struct process_symbol_args args = { .name = symbol_name, };
+       event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
 
        mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
        if (machine__is_host(machine)) {
@@ -305,10 +380,10 @@ int event__synthesize_kernel_mmap(event__handler_t process,
                 * kernel uses PERF_RECORD_MISC_USER for user space maps,
                 * see kernel/perf_event.c __perf_event_mmap
                 */
-               ev.header.misc = PERF_RECORD_MISC_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
                filename = "/proc/kallsyms";
        } else {
-               ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
                if (machine__is_default_guest(machine))
                        filename = (char *) symbol_conf.default_guest_kallsyms;
                else {
@@ -321,17 +396,21 @@ int event__synthesize_kernel_mmap(event__handler_t process,
                return -ENOENT;
 
        map = machine->vmlinux_maps[MAP__FUNCTION];
-       size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
+       size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
                        "%s%s", mmap_name, symbol_name) + 1;
        size = ALIGN(size, sizeof(u64));
-       ev.mmap.header.size = (sizeof(ev.mmap) -
-                       (sizeof(ev.mmap.filename) - size));
-       ev.mmap.pgoff = args.start;
-       ev.mmap.start = map->start;
-       ev.mmap.len   = map->end - ev.mmap.start;
-       ev.mmap.pid   = machine->pid;
-
-       return process(&ev, session);
+       event->mmap.header.type = PERF_RECORD_MMAP;
+       event->mmap.header.size = (sizeof(event->mmap) -
+                       (sizeof(event->mmap.filename) - size) + session->id_hdr_size);
+       event->mmap.pgoff = args.start;
+       event->mmap.start = map->start;
+       event->mmap.len   = map->end - event->mmap.start;
+       event->mmap.pid   = machine->pid;
+
+       err = process(event, &synth_sample, session);
+       free(event);
+
+       return err;
 }
 
 static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -361,7 +440,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm,
        return 0;
 }
 
-int event__process_comm(event_t *self, struct perf_session *session)
+int event__process_comm(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->comm.tid);
 
@@ -376,7 +456,8 @@ int event__process_comm(event_t *self, struct perf_session *session)
        return 0;
 }
 
-int event__process_lost(event_t *self, struct perf_session *session)
+int event__process_lost(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
        session->hists.stats.total_lost += self->lost.lost;
@@ -392,7 +473,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
         * a zero sized synthesized MMAP event for the kernel.
         */
        if (maps[MAP__FUNCTION]->end == 0)
-               maps[MAP__FUNCTION]->end = ~0UL;
+               maps[MAP__FUNCTION]->end = ~0ULL;
 }
 
 static int event__process_kernel_mmap(event_t *self,
@@ -485,7 +566,8 @@ out_problem:
        return -1;
 }
 
-int event__process_mmap(event_t *self, struct perf_session *session)
+int event__process_mmap(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct machine *machine;
        struct thread *thread;
@@ -526,7 +608,8 @@ out_problem:
        return 0;
 }
 
-int event__process_task(event_t *self, struct perf_session *session)
+int event__process_task(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->fork.tid);
        struct thread *parent = perf_session__findnew(session, self->fork.ptid);
@@ -548,18 +631,19 @@ int event__process_task(event_t *self, struct perf_session *session)
        return 0;
 }
 
-int event__process(event_t *event, struct perf_session *session)
+int event__process(event_t *event, struct sample_data *sample,
+                  struct perf_session *session)
 {
        switch (event->header.type) {
        case PERF_RECORD_COMM:
-               event__process_comm(event, session);
+               event__process_comm(event, sample, session);
                break;
        case PERF_RECORD_MMAP:
-               event__process_mmap(event, session);
+               event__process_mmap(event, sample, session);
                break;
        case PERF_RECORD_FORK:
        case PERF_RECORD_EXIT:
-               event__process_task(event, session);
+               event__process_task(event, sample, session);
                break;
        default:
                break;
@@ -674,32 +758,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
                             symbol_filter_t filter)
 {
        u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-       struct thread *thread;
-
-       event__parse_sample(self, session->sample_type, data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
-                   self->header.misc, data->pid, data->tid, data->ip,
-                   data->period, data->cpu);
-
-       if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
-               unsigned int i;
-
-               dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
+       struct thread *thread = perf_session__findnew(session, self->ip.pid);
 
-               if (!ip_callchain__valid(data->callchain, self)) {
-                       pr_debug("call-chain problem with event, "
-                                "skipping it.\n");
-                       goto out_filtered;
-               }
-
-               if (dump_trace) {
-                       for (i = 0; i < data->callchain->nr; i++)
-                               dump_printf("..... %2d: %016Lx\n",
-                                           i, data->callchain->ips[i]);
-               }
-       }
-       thread = perf_session__findnew(session, self->ip.pid);
        if (thread == NULL)
                return -1;
 
@@ -766,9 +826,65 @@ out_filtered:
        return 0;
 }
 
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data)
+static int event__parse_id_sample(const event_t *event,
+                                 struct perf_session *session,
+                                 struct sample_data *sample)
 {
-       const u64 *array = event->sample.array;
+       const u64 *array;
+       u64 type;
+
+       sample->cpu = sample->pid = sample->tid = -1;
+       sample->stream_id = sample->id = sample->time = -1ULL;
+
+       if (!session->sample_id_all)
+               return 0;
+
+       array = event->sample.array;
+       array += ((event->header.size -
+                  sizeof(event->header)) / sizeof(u64)) - 1;
+       type = session->sample_type;
+
+       if (type & PERF_SAMPLE_CPU) {
+               u32 *p = (u32 *)array;
+               sample->cpu = *p;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_STREAM_ID) {
+               sample->stream_id = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_ID) {
+               sample->id = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_TIME) {
+               sample->time = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_TID) {
+               u32 *p = (u32 *)array;
+               sample->pid = p[0];
+               sample->tid = p[1];
+       }
+
+       return 0;
+}
+
+int event__parse_sample(const event_t *event, struct perf_session *session,
+                       struct sample_data *data)
+{
+       const u64 *array;
+       u64 type;
+
+       if (event->header.type != PERF_RECORD_SAMPLE)
+               return event__parse_id_sample(event, session, data);
+
+       array = event->sample.array;
+       type = session->sample_type;
 
        if (type & PERF_SAMPLE_IP) {
                data->ip = event->ip.ip;
index 8e790dae702625aa564594bee1dd52618b94f8d8..2b7e91902f105d5962a68254737ce21cfeef935c 100644 (file)
@@ -85,6 +85,7 @@ struct build_id_event {
 };
 
 enum perf_user_event_type { /* above any possible kernel type */
+       PERF_RECORD_USER_TYPE_START             = 64,
        PERF_RECORD_HEADER_ATTR                 = 64,
        PERF_RECORD_HEADER_EVENT_TYPE           = 65,
        PERF_RECORD_HEADER_TRACING_DATA         = 66,
@@ -135,12 +136,15 @@ void event__print_totals(void);
 
 struct perf_session;
 
-typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
+typedef int (*event__handler_synth_t)(event_t *event, 
+                                     struct perf_session *session);
+typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
+                               struct perf_session *session);
 
 int event__synthesize_thread(pid_t pid, event__handler_t process,
                             struct perf_session *session);
-void event__synthesize_threads(event__handler_t process,
-                              struct perf_session *session);
+int event__synthesize_threads(event__handler_t process,
+                             struct perf_session *session);
 int event__synthesize_kernel_mmap(event__handler_t process,
                                struct perf_session *session,
                                struct machine *machine,
@@ -150,18 +154,24 @@ int event__synthesize_modules(event__handler_t process,
                              struct perf_session *session,
                              struct machine *machine);
 
-int event__process_comm(event_t *self, struct perf_session *session);
-int event__process_lost(event_t *self, struct perf_session *session);
-int event__process_mmap(event_t *self, struct perf_session *session);
-int event__process_task(event_t *self, struct perf_session *session);
-int event__process(event_t *event, struct perf_session *session);
+int event__process_comm(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_lost(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_mmap(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_task(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process(event_t *event, struct sample_data *sample,
+                  struct perf_session *session);
 
 struct addr_location;
 int event__preprocess_sample(const event_t *self, struct perf_session *session,
                             struct addr_location *al, struct sample_data *data,
                             symbol_filter_t filter);
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data);
+int event__parse_sample(const event_t *event, struct perf_session *session,
+                       struct sample_data *sample);
 
-extern const char *event__name[];
+const char *event__get_event_name(unsigned int id);
 
 #endif /* __PERF_RECORD_H */
index 7cba0551a56550888c98ff1a933fd6bec8ab9ca0..4b8c8397a94706e0002fe3b1b126fc77d81c1aea 100644 (file)
@@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat)
        set_bit(feat, self->adds_features);
 }
 
+void perf_header__clear_feat(struct perf_header *self, int feat)
+{
+       clear_bit(feat, self->adds_features);
+}
+
 bool perf_header__has_feat(const struct perf_header *self, int feat)
 {
        return test_bit(feat, self->adds_features);
@@ -433,8 +438,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
        int idx = 0, err;
 
        session = container_of(self, struct perf_session, header);
-       if (perf_session__read_build_ids(session, true))
-               perf_header__set_feat(self, HEADER_BUILD_ID);
+
+       if (perf_header__has_feat(self, HEADER_BUILD_ID &&
+           !perf_session__read_build_ids(session, true)))
+               perf_header__clear_feat(self, HEADER_BUILD_ID);
 
        nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
        if (!nr_sections)
@@ -941,6 +948,24 @@ u64 perf_header__sample_type(struct perf_header *header)
        return type;
 }
 
+bool perf_header__sample_id_all(const struct perf_header *header)
+{
+       bool value = false, first = true;
+       int i;
+
+       for (i = 0; i < header->attrs; i++) {
+               struct perf_header_attr *attr = header->attr[i];
+
+               if (first) {
+                       value = attr->attr.sample_id_all;
+                       first = false;
+               } else if (value != attr->attr.sample_id_all)
+                       die("non matching sample_id_all");
+       }
+
+       return value;
+}
+
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header)
 {
@@ -987,21 +1012,23 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
 
        ev = malloc(size);
 
+       if (ev == NULL)
+               return -ENOMEM;
+
        ev->attr.attr = *attr;
        memcpy(ev->attr.id, id, ids * sizeof(u64));
 
        ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
        ev->attr.header.size = size;
 
-       err = process(ev, session);
+       err = process(ev, NULL, session);
 
        free(ev);
 
        return err;
 }
 
-int event__synthesize_attrs(struct perf_header *self,
-                           event__handler_t process,
+int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
                            struct perf_session *session)
 {
        struct perf_header_attr *attr;
@@ -1071,7 +1098,7 @@ int event__synthesize_event_type(u64 event_id, char *name,
        ev.event_type.header.size = sizeof(ev.event_type) -
                (sizeof(ev.event_type.event_type.name) - size);
 
-       err = process(&ev, session);
+       err = process(&ev, NULL, session);
 
        return err;
 }
@@ -1126,7 +1153,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
        ev.tracing_data.header.size = sizeof(ev.tracing_data);
        ev.tracing_data.size = aligned_size;
 
-       process(&ev, session);
+       process(&ev, NULL, session);
 
        err = read_tracing_data(fd, pattrs, nb_events);
        write_padded(fd, NULL, 0, padding);
@@ -1186,7 +1213,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
        ev.build_id.header.size = sizeof(ev.build_id) + len;
        memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
 
-       err = process(&ev, session);
+       err = process(&ev, NULL, session);
 
        return err;
 }
index 402ac2454cf8bcc664c3daf193119f470c77d7c3..6335965e1f93d8df0db8ddaa2c83c434182fb068 100644 (file)
@@ -81,9 +81,11 @@ void perf_header_attr__delete(struct perf_header_attr *self);
 int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
 
 u64 perf_header__sample_type(struct perf_header *header);
+bool perf_header__sample_id_all(const struct perf_header *header);
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header);
 void perf_header__set_feat(struct perf_header *self, int feat);
+void perf_header__clear_feat(struct perf_header *self, int feat);
 bool perf_header__has_feat(const struct perf_header *self, int feat);
 
 int perf_header__process_sections(struct perf_header *self, int fd,
index 2022e87409942ca4b0d133c3f889e41178a663d1..d5036700a4359ba640e6e9c953e7b0861c443382 100644 (file)
@@ -1092,6 +1092,12 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
        FILE *file;
        int err = 0;
        u64 len;
+       char symfs_filename[PATH_MAX];
+
+       if (filename) {
+               snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+                        symbol_conf.symfs, filename);
+       }
 
        if (filename == NULL) {
                if (dso->has_build_id) {
@@ -1100,9 +1106,9 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
                        return -ENOMEM;
                }
                goto fallback;
-       } else if (readlink(filename, command, sizeof(command)) < 0 ||
+       } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
                   strstr(command, "[kernel.kallsyms]") ||
-                  access(filename, R_OK)) {
+                  access(symfs_filename, R_OK)) {
                free(filename);
 fallback:
                /*
@@ -1111,6 +1117,8 @@ fallback:
                 * DSO is the same as when 'perf record' ran.
                 */
                filename = dso->long_name;
+               snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+                        symbol_conf.symfs, filename);
                free_filename = false;
        }
 
@@ -1137,7 +1145,7 @@ fallback:
                 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
                 map__rip_2objdump(map, sym->start),
                 map__rip_2objdump(map, sym->end),
-                filename, filename);
+                symfs_filename, filename);
 
        pr_debug("Executing: %s\n", command);
 
@@ -1168,10 +1176,13 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
        size_t ret = 0;
 
        for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
-               if (!event__name[i])
+               const char *name = event__get_event_name(i);
+
+               if (!strcmp(name, "UNKNOWN"))
                        continue;
-               ret += fprintf(fp, "%10s events: %10d\n",
-                              event__name[i], self->stats.nr_events[i]);
+
+               ret += fprintf(fp, "%16s events: %10d\n", name,
+                              self->stats.nr_events[i]);
        }
 
        return ret;
index 587d375d34300daa09948c1a0d95407b4f8439fa..ee789856a8c94644e189f0dc8a7be7933469a6cb 100644 (file)
@@ -52,8 +52,10 @@ struct sym_priv {
 struct events_stats {
        u64 total_period;
        u64 total_lost;
+       u64 total_invalid_chains;
        u32 nr_events[PERF_RECORD_HEADER_MAX];
        u32 nr_unknown_events;
+       u32 nr_invalid_chains;
 };
 
 enum hist_column {
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644 (file)
index 0000000..acffd5e
--- /dev/null
@@ -0,0 +1,9 @@
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644 (file)
index 0000000..bb4198e
--- /dev/null
@@ -0,0 +1,11 @@
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+
+#endif /* PERF_DWARF2_H */
+
index bb4ac2e053859482f98933b278a8d0adda71aa5a..8be0b968ca0bcfa44c95248e14e3878481887b21 100644 (file)
@@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr)
        addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
 }
 
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+       addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
+}
+
 static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
 {
        return ((1UL << (nr % BITS_PER_LONG)) &
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644 (file)
index 0000000..06387cf
--- /dev/null
@@ -0,0 +1,13 @@
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+#define ENTRY(name)                            \
+       .globl name;                            \
+       name:
+
+#define ENDPROC(name)
+
+#endif /* PERF_LINUX_LINKAGE_H_ */
index 4af5bd59cfd14b475d0f2fa60e15f1b4b4e908de..c305305a3884a9feab92adb6ee28e168321e2d92 100644 (file)
@@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name,
        id = atoll(id_buf);
        attr->config = id;
        attr->type = PERF_TYPE_TRACEPOINT;
-       *strp = evt_name + evt_length;
+       *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
 
        attr->sample_type |= PERF_SAMPLE_RAW;
        attr->sample_type |= PERF_SAMPLE_TIME;
@@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
                                    struct perf_event_attr *attr)
 {
        const char *evt_name;
-       char *flags;
+       char *flags = NULL, *comma_loc;
        char sys_name[MAX_EVENT_LENGTH];
        unsigned int sys_length, evt_length;
 
@@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp,
        sys_name[sys_length] = '\0';
        evt_name = evt_name + 1;
 
+       comma_loc = strchr(evt_name, ',');
+       if (comma_loc) {
+               /* take the event name up to the comma */
+               evt_name = strndup(evt_name, comma_loc - evt_name);
+       }
        flags = strchr(evt_name, ':');
        if (flags) {
                /* split it out: */
@@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp,
        evt_length = strlen(evt_name);
        if (evt_length >= MAX_EVENT_LENGTH)
                return EVT_FAILED;
-
        if (strpbrk(evt_name, "*?")) {
-               *strp = evt_name + evt_length;
+               *strp += strlen(sys_name) + evt_length;
                return parse_multiple_tracepoint_event(sys_name, evt_name,
                                                       flags);
        } else
index c7d72dce54b2cf7c3f46042bd6ce6a68c941b4d6..abc31a1dac1a738c5791512032c5a9337af84b9a 100644 (file)
@@ -119,6 +119,10 @@ struct option {
        { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
 #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
        { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
+#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
+       { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
+       .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
+       .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
 
 /* parse_options() will filter out the processed options and leave the
  * non-option argments in argv[].
index 61191c6cbe7a8d04af83e1af5426029a73bc7b99..128aaab0aedad86403a0c722211e4d57cb982d8d 100644 (file)
@@ -95,7 +95,7 @@ static int init_vmlinux(void)
                goto out;
 
        if (machine__create_kernel_maps(&machine) < 0) {
-               pr_debug("machine__create_kernel_maps ");
+               pr_debug("machine__create_kernel_maps() failed.\n");
                goto out;
        }
 out:
@@ -149,7 +149,8 @@ static int open_vmlinux(const char *module)
 {
        const char *path = kernel_get_module_path(module);
        if (!path) {
-               pr_err("Failed to find path of %s module", module ?: "kernel");
+               pr_err("Failed to find path of %s module.\n",
+                      module ?: "kernel");
                return -ENOENT;
        }
        pr_debug("Try to open %s\n", path);
@@ -226,7 +227,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                pr_warning("Warning: No dwarf info found in the vmlinux - "
                        "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
                if (!need_dwarf) {
-                       pr_debug("Trying to use symbols.\nn");
+                       pr_debug("Trying to use symbols.\n");
                        return 0;
                }
        }
@@ -295,42 +296,49 @@ static int get_real_path(const char *raw_path, const char *comp_dir,
 #define LINEBUF_SIZE 256
 #define NR_ADDITIONAL_LINES 2
 
-static int show_one_line(FILE *fp, int l, bool skip, bool show_num)
+static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
 {
        char buf[LINEBUF_SIZE];
-       const char *color = PERF_COLOR_BLUE;
+       const char *color = show_num ? "" : PERF_COLOR_BLUE;
+       const char *prefix = NULL;
 
-       if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
-               goto error;
-       if (!skip) {
-               if (show_num)
-                       fprintf(stdout, "%7d  %s", l, buf);
-               else
-                       color_fprintf(stdout, color, "         %s", buf);
-       }
-
-       while (strlen(buf) == LINEBUF_SIZE - 1 &&
-              buf[LINEBUF_SIZE - 2] != '\n') {
+       do {
                if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
                        goto error;
-               if (!skip) {
-                       if (show_num)
-                               fprintf(stdout, "%s", buf);
-                       else
-                               color_fprintf(stdout, color, "%s", buf);
+               if (skip)
+                       continue;
+               if (!prefix) {
+                       prefix = show_num ? "%7d  " : "         ";
+                       color_fprintf(stdout, color, prefix, l);
                }
-       }
+               color_fprintf(stdout, color, "%s", buf);
 
-       return 0;
+       } while (strchr(buf, '\n') == NULL);
+
+       return 1;
 error:
-       if (feof(fp))
-               pr_warning("Source file is shorter than expected.\n");
-       else
+       if (ferror(fp)) {
                pr_warning("File read error: %s\n", strerror(errno));
+               return -1;
+       }
+       return 0;
+}
 
-       return -1;
+static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+       int rv = __show_one_line(fp, l, skip, show_num);
+       if (rv == 0) {
+               pr_warning("Source file is shorter than expected.\n");
+               rv = -1;
+       }
+       return rv;
 }
 
+#define show_one_line_with_num(f,l)    _show_one_line(f,l,false,true)
+#define show_one_line(f,l)             _show_one_line(f,l,false,false)
+#define skip_one_line(f,l)             _show_one_line(f,l,true,false)
+#define show_one_line_or_eof(f,l)      __show_one_line(f,l,false,false)
+
 /*
  * Show line-range always requires debuginfo to find source file and
  * line number.
@@ -379,7 +387,7 @@ int show_line_range(struct line_range *lr, const char *module)
                fprintf(stdout, "<%s:%d>\n", lr->function,
                        lr->start - lr->offset);
        else
-               fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+               fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
 
        fp = fopen(lr->path, "r");
        if (fp == NULL) {
@@ -388,26 +396,30 @@ int show_line_range(struct line_range *lr, const char *module)
                return -errno;
        }
        /* Skip to starting line number */
-       while (l < lr->start && ret >= 0)
-               ret = show_one_line(fp, l++, true, false);
-       if (ret < 0)
-               goto end;
+       while (l < lr->start) {
+               ret = skip_one_line(fp, l++);
+               if (ret < 0)
+                       goto end;
+       }
 
        list_for_each_entry(ln, &lr->line_list, list) {
-               while (ln->line > l && ret >= 0)
-                       ret = show_one_line(fp, (l++) - lr->offset,
-                                           false, false);
-               if (ret >= 0)
-                       ret = show_one_line(fp, (l++) - lr->offset,
-                                           false, true);
+               for (; ln->line > l; l++) {
+                       ret = show_one_line(fp, l - lr->offset);
+                       if (ret < 0)
+                               goto end;
+               }
+               ret = show_one_line_with_num(fp, l++ - lr->offset);
                if (ret < 0)
                        goto end;
        }
 
        if (lr->end == INT_MAX)
                lr->end = l + NR_ADDITIONAL_LINES;
-       while (l <= lr->end && !feof(fp) && ret >= 0)
-               ret = show_one_line(fp, (l++) - lr->offset, false, false);
+       while (l <= lr->end) {
+               ret = show_one_line_or_eof(fp, l++ - lr->offset);
+               if (ret <= 0)
+                       break;
+       }
 end:
        fclose(fp);
        return ret;
@@ -466,7 +478,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
 
        fd = open_vmlinux(module);
        if (fd < 0) {
-               pr_warning("Failed to open debuginfo file.\n");
+               pr_warning("Failed to open debug information file.\n");
                return fd;
        }
 
@@ -526,56 +538,87 @@ int show_available_vars(struct perf_probe_event *pevs __unused,
 }
 #endif
 
+static int parse_line_num(char **ptr, int *val, const char *what)
+{
+       const char *start = *ptr;
+
+       errno = 0;
+       *val = strtol(*ptr, ptr, 0);
+       if (errno || *ptr == start) {
+               semantic_error("'%s' is not a valid number.\n", what);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/*
+ * Stuff 'lr' according to the line range described by 'arg'.
+ * The line range syntax is described by:
+ *
+ *         SRC[:SLN[+NUM|-ELN]]
+ *         FNC[:SLN[+NUM|-ELN]]
+ */
 int parse_line_range_desc(const char *arg, struct line_range *lr)
 {
-       const char *ptr;
-       char *tmp;
-       /*
-        * <Syntax>
-        * SRC:SLN[+NUM|-ELN]
-        * FUNC[:SLN[+NUM|-ELN]]
-        */
-       ptr = strchr(arg, ':');
-       if (ptr) {
-               lr->start = (int)strtoul(ptr + 1, &tmp, 0);
-               if (*tmp == '+') {
-                       lr->end = lr->start + (int)strtoul(tmp + 1, &tmp, 0);
-                       lr->end--;      /*
-                                        * Adjust the number of lines here.
-                                        * If the number of lines == 1, the
-                                        * the end of line should be equal to
-                                        * the start of line.
-                                        */
-               } else if (*tmp == '-')
-                       lr->end = (int)strtoul(tmp + 1, &tmp, 0);
-               else
-                       lr->end = INT_MAX;
+       char *range, *name = strdup(arg);
+       int err;
+
+       if (!name)
+               return -ENOMEM;
+
+       lr->start = 0;
+       lr->end = INT_MAX;
+
+       range = strchr(name, ':');
+       if (range) {
+               *range++ = '\0';
+
+               err = parse_line_num(&range, &lr->start, "start line");
+               if (err)
+                       goto err;
+
+               if (*range == '+' || *range == '-') {
+                       const char c = *range++;
+
+                       err = parse_line_num(&range, &lr->end, "end line");
+                       if (err)
+                               goto err;
+
+                       if (c == '+') {
+                               lr->end += lr->start;
+                               /*
+                                * Adjust the number of lines here.
+                                * If the number of lines == 1, the
+                                * the end of line should be equal to
+                                * the start of line.
+                                */
+                               lr->end--;
+                       }
+               }
+
                pr_debug("Line range is %d to %d\n", lr->start, lr->end);
+
+               err = -EINVAL;
                if (lr->start > lr->end) {
                        semantic_error("Start line must be smaller"
                                       " than end line.\n");
-                       return -EINVAL;
+                       goto err;
                }
-               if (*tmp != '\0') {
-                       semantic_error("Tailing with invalid character '%d'.\n",
-                                      *tmp);
-                       return -EINVAL;
+               if (*range != '\0') {
+                       semantic_error("Tailing with invalid str '%s'.\n", range);
+                       goto err;
                }
-               tmp = strndup(arg, (ptr - arg));
-       } else {
-               tmp = strdup(arg);
-               lr->end = INT_MAX;
        }
 
-       if (tmp == NULL)
-               return -ENOMEM;
-
-       if (strchr(tmp, '.'))
-               lr->file = tmp;
+       if (strchr(name, '.'))
+               lr->file = name;
        else
-               lr->function = tmp;
+               lr->function = name;
 
        return 0;
+err:
+       free(name);
+       return err;
 }
 
 /* Check the name is good for event/group */
@@ -699,39 +742,40 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 
        /* Exclusion check */
        if (pp->lazy_line && pp->line) {
-               semantic_error("Lazy pattern can't be used with line number.");
+               semantic_error("Lazy pattern can't be used with"
+                              " line number.\n");
                return -EINVAL;
        }
 
        if (pp->lazy_line && pp->offset) {
-               semantic_error("Lazy pattern can't be used with offset.");
+               semantic_error("Lazy pattern can't be used with offset.\n");
                return -EINVAL;
        }
 
        if (pp->line && pp->offset) {
-               semantic_error("Offset can't be used with line number.");
+               semantic_error("Offset can't be used with line number.\n");
                return -EINVAL;
        }
 
        if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
                semantic_error("File always requires line number or "
-                              "lazy pattern.");
+                              "lazy pattern.\n");
                return -EINVAL;
        }
 
        if (pp->offset && !pp->function) {
-               semantic_error("Offset requires an entry function.");
+               semantic_error("Offset requires an entry function.\n");
                return -EINVAL;
        }
 
        if (pp->retprobe && !pp->function) {
-               semantic_error("Return probe requires an entry function.");
+               semantic_error("Return probe requires an entry function.\n");
                return -EINVAL;
        }
 
        if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
                semantic_error("Offset/Line/Lazy pattern can't be used with "
-                              "return probe.");
+                              "return probe.\n");
                return -EINVAL;
        }
 
@@ -1005,7 +1049,7 @@ int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
 
        return tmp - buf;
 error:
-       pr_debug("Failed to synthesize perf probe argument: %s",
+       pr_debug("Failed to synthesize perf probe argument: %s\n",
                 strerror(-ret));
        return ret;
 }
@@ -1033,13 +1077,13 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
                        goto error;
        }
        if (pp->file) {
-               len = strlen(pp->file) - 31;
-               if (len < 0)
-                       len = 0;
-               tmp = strchr(pp->file + len, '/');
-               if (!tmp)
-                       tmp = pp->file + len;
-               ret = e_snprintf(file, 32, "@%s", tmp + 1);
+               tmp = pp->file;
+               len = strlen(tmp);
+               if (len > 30) {
+                       tmp = strchr(pp->file + len - 30, '/');
+                       tmp = tmp ? tmp + 1 : pp->file + len - 30;
+               }
+               ret = e_snprintf(file, 32, "@%s", tmp);
                if (ret <= 0)
                        goto error;
        }
@@ -1055,7 +1099,7 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
 
        return buf;
 error:
-       pr_debug("Failed to synthesize perf probe point: %s",
+       pr_debug("Failed to synthesize perf probe point: %s\n",
                 strerror(-ret));
        if (buf)
                free(buf);
@@ -1796,7 +1840,7 @@ static int del_trace_probe_event(int fd, const char *group,
 
        ret = e_snprintf(buf, 128, "%s:%s", group, event);
        if (ret < 0) {
-               pr_err("Failed to copy event.");
+               pr_err("Failed to copy event.\n");
                return ret;
        }
 
index ddf4d45563218ad9e2d8971d32f2a8f25097c502..ab83b6ac5d657c80af1e67790c1ace15d7592578 100644 (file)
@@ -652,8 +652,8 @@ static_var:
        regs = get_arch_regstr(regn);
        if (!regs) {
                /* This should be a bug in DWARF or this tool */
-               pr_warning("Mapping for DWARF register number %u "
-                          "missing on this architecture.", regn);
+               pr_warning("Mapping for the register number %u "
+                          "missing on this architecture.\n", regn);
                return -ERANGE;
        }
 
@@ -699,13 +699,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                if (ret != DW_TAG_pointer_type &&
                    ret != DW_TAG_array_type) {
                        pr_warning("Failed to cast into string: "
-                                  "%s(%s) is not a pointer nor array.",
+                                  "%s(%s) is not a pointer nor array.\n",
                                   dwarf_diename(vr_die), dwarf_diename(&type));
                        return -EINVAL;
                }
                if (ret == DW_TAG_pointer_type) {
                        if (die_get_real_type(&type, &type) == NULL) {
-                               pr_warning("Failed to get a type information.");
+                               pr_warning("Failed to get a type"
+                                          " information.\n");
                                return -ENOENT;
                        }
                        while (*ref_ptr)
@@ -720,7 +721,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                if (!die_compare_name(&type, "char") &&
                    !die_compare_name(&type, "unsigned char")) {
                        pr_warning("Failed to cast into string: "
-                                  "%s is not (unsigned) char *.",
+                                  "%s is not (unsigned) char *.\n",
                                   dwarf_diename(vr_die));
                        return -EINVAL;
                }
@@ -830,8 +831,8 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                        return -EINVAL;
                }
                if (field->name[0] == '[') {
-                       pr_err("Semantic error: %s is not a pointor nor array.",
-                              varname);
+                       pr_err("Semantic error: %s is not a pointor"
+                              " nor array.\n", varname);
                        return -EINVAL;
                }
                if (field->ref) {
@@ -978,7 +979,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
        name = dwarf_diename(sp_die);
        if (name) {
                if (dwarf_entrypc(sp_die, &eaddr) != 0) {
-                       pr_warning("Failed to get entry pc of %s\n",
+                       pr_warning("Failed to get entry address of %s\n",
                                   dwarf_diename(sp_die));
                        return -ENOENT;
                }
@@ -994,7 +995,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
        if (retprobe) {
                if (eaddr != paddr) {
                        pr_warning("Return probe must be on the head of"
-                                  " a real function\n");
+                                  " a real function.\n");
                        return -EINVAL;
                }
                tp->retprobe = true;
@@ -1033,7 +1034,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
                Dwarf_Frame *frame;
                if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
                    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
-                       pr_warning("Failed to get CFA on 0x%jx\n",
+                       pr_warning("Failed to get call frame on 0x%jx\n",
                                   (uintmax_t)pf->addr);
                        return -ENOENT;
                }
@@ -1060,7 +1061,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)
        int ret = 0;
 
        if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1162,7 +1163,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
        }
 
        if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1220,7 +1221,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
        else {
                /* Get probe address */
                if (dwarf_entrypc(in_die, &addr) != 0) {
-                       pr_warning("Failed to get entry pc of %s.\n",
+                       pr_warning("Failed to get entry address of %s.\n",
                                   dwarf_diename(in_die));
                        param->retval = -ENOENT;
                        return DWARF_CB_ABORT;
@@ -1261,8 +1262,8 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
                        param->retval = find_probe_point_lazy(sp_die, pf);
                else {
                        if (dwarf_entrypc(sp_die, &pf->addr) != 0) {
-                               pr_warning("Failed to get entry pc of %s.\n",
-                                          dwarf_diename(sp_die));
+                               pr_warning("Failed to get entry address of "
+                                          "%s.\n", dwarf_diename(sp_die));
                                param->retval = -ENOENT;
                                return DWARF_CB_ABORT;
                        }
@@ -1304,7 +1305,7 @@ static int find_probes(int fd, struct probe_finder *pf)
 
        dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                return -EBADF;
        }
@@ -1549,7 +1550,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
        /* Open the live linux kernel */
        dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                ret = -EINVAL;
                goto end;
@@ -1559,7 +1560,8 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
        addr += bias;
        /* Find cu die */
        if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
-               pr_warning("No CU DIE is found at %lx\n", addr);
+               pr_warning("Failed to find debug information for address %lx\n",
+                          addr);
                ret = -EINVAL;
                goto end;
        }
@@ -1684,7 +1686,7 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
 
        line_list__init(&lf->lr->line_list);
        if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1809,7 +1811,7 @@ int find_line_range(int fd, struct line_range *lr)
 
        dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                return -EBADF;
        }
index bba69d4556999e5081b018857acd230de6740eea..beaefc3c1223df16423cbd084f700b6ee5a5fbf2 100644 (file)
@@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
                                  bool externs);
 
 #include <dwarf.h>
-#include <libdw.h>
-#include <libdwfl.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
 
 struct probe_finder {
        struct perf_probe_event *pev;           /* Target probe event */
index b059dc50cc2db9021b75435e9aac132174c6dbec..93680818e244ca8a2e58f49e59af0ca729eeb50e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * trace-event-perl.  Feed perf trace events to an embedded Perl interpreter.
+ * trace-event-perl.  Feed perf script events to an embedded Perl interpreter.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
@@ -411,8 +411,8 @@ static int perl_generate_script(const char *outfile)
                return -1;
        }
 
-       fprintf(ofp, "# perf trace event handlers, "
-               "generated by perf trace -g perl\n");
+       fprintf(ofp, "# perf script event handlers, "
+               "generated by perf script -g perl\n");
 
        fprintf(ofp, "# Licensed under the terms of the GNU GPL"
                " License version 2\n\n");
index 33a632523743deff80cb9e3636bd9144b84b2f76..c6d99334bdfa836c1adba2d613b658fadf20797b 100644 (file)
@@ -442,8 +442,8 @@ static int python_generate_script(const char *outfile)
                fprintf(stderr, "couldn't open %s\n", fname);
                return -1;
        }
-       fprintf(ofp, "# perf trace event handlers, "
-               "generated by perf trace -g python\n");
+       fprintf(ofp, "# perf script event handlers, "
+               "generated by perf script -g python\n");
 
        fprintf(ofp, "# Licensed under the terms of the GNU GPL"
                " License version 2\n\n");
index fa9d652c2dc3c07182028d4a196293333be75d78..0f7e544544f569c3246f44e3379bd4453dca119b 100644 (file)
@@ -65,9 +65,49 @@ out_close:
        return -1;
 }
 
+static void perf_session__id_header_size(struct perf_session *session)
+{
+       struct sample_data *data;
+       u64 sample_type = session->sample_type;
+       u16 size = 0;
+
+       if (!session->sample_id_all)
+               goto out;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               size += sizeof(data->tid) * 2;
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               size += sizeof(data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               size += sizeof(data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               size += sizeof(data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               size += sizeof(data->cpu) * 2;
+out:
+       session->id_hdr_size = size;
+}
+
+void perf_session__set_sample_id_all(struct perf_session *session, bool value)
+{
+       session->sample_id_all = value;
+       perf_session__id_header_size(session);
+}
+
+void perf_session__set_sample_type(struct perf_session *session, u64 type)
+{
+       session->sample_type = type;
+}
+
 void perf_session__update_sample_type(struct perf_session *self)
 {
        self->sample_type = perf_header__sample_type(&self->header);
+       self->sample_id_all = perf_header__sample_id_all(&self->header);
+       perf_session__id_header_size(self);
 }
 
 int perf_session__create_kernel_maps(struct perf_session *self)
@@ -85,7 +125,9 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self)
        machines__destroy_guest_kernel_maps(&self->machines);
 }
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
+struct perf_session *perf_session__new(const char *filename, int mode,
+                                      bool force, bool repipe,
+                                      struct perf_event_ops *ops)
 {
        size_t len = filename ? strlen(filename) + 1 : 0;
        struct perf_session *self = zalloc(sizeof(*self) + len);
@@ -101,10 +143,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
        INIT_LIST_HEAD(&self->dead_threads);
        self->hists_tree = RB_ROOT;
        self->last_match = NULL;
-       self->mmap_window = 32;
+       /*
+        * On 64bit we can mmap the data file in one go. No need for tiny mmap
+        * slices. On 32bit we use 32MB.
+        */
+#if BITS_PER_LONG == 64
+       self->mmap_window = ULLONG_MAX;
+#else
+       self->mmap_window = 32 * 1024 * 1024ULL;
+#endif
        self->machines = RB_ROOT;
        self->repipe = repipe;
-       INIT_LIST_HEAD(&self->ordered_samples.samples_head);
+       INIT_LIST_HEAD(&self->ordered_samples.samples);
+       INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
+       INIT_LIST_HEAD(&self->ordered_samples.to_free);
        machine__init(&self->host_machine, "", HOST_KERNEL_ID);
 
        if (mode == O_RDONLY) {
@@ -120,6 +172,13 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
        }
 
        perf_session__update_sample_type(self);
+
+       if (ops && ops->ordering_requires_timestamps &&
+           ops->ordered_samples && !self->sample_id_all) {
+               dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
+               ops->ordered_samples = false;
+       }
+
 out:
        return self;
 out_free:
@@ -230,7 +289,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
        return syms;
 }
 
+static int process_event_synth_stub(event_t *event __used,
+                                   struct perf_session *session __used)
+{
+       dump_printf(": unhandled!\n");
+       return 0;
+}
+
 static int process_event_stub(event_t *event __used,
+                             struct sample_data *sample __used,
                              struct perf_session *session __used)
 {
        dump_printf(": unhandled!\n");
@@ -262,7 +329,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
        if (handler->exit == NULL)
                handler->exit = process_event_stub;
        if (handler->lost == NULL)
-               handler->lost = process_event_stub;
+               handler->lost = event__process_lost;
        if (handler->read == NULL)
                handler->read = process_event_stub;
        if (handler->throttle == NULL)
@@ -270,13 +337,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
        if (handler->unthrottle == NULL)
                handler->unthrottle = process_event_stub;
        if (handler->attr == NULL)
-               handler->attr = process_event_stub;
+               handler->attr = process_event_synth_stub;
        if (handler->event_type == NULL)
-               handler->event_type = process_event_stub;
+               handler->event_type = process_event_synth_stub;
        if (handler->tracing_data == NULL)
-               handler->tracing_data = process_event_stub;
+               handler->tracing_data = process_event_synth_stub;
        if (handler->build_id == NULL)
-               handler->build_id = process_event_stub;
+               handler->build_id = process_event_synth_stub;
        if (handler->finished_round == NULL) {
                if (handler->ordered_samples)
                        handler->finished_round = process_finished_round;
@@ -386,33 +453,61 @@ static event__swap_op event__swap_ops[] = {
 
 struct sample_queue {
        u64                     timestamp;
-       struct sample_event     *event;
+       u64                     file_offset;
+       event_t                 *event;
        struct list_head        list;
 };
 
+static void perf_session_free_sample_buffers(struct perf_session *session)
+{
+       struct ordered_samples *os = &session->ordered_samples;
+
+       while (!list_empty(&os->to_free)) {
+               struct sample_queue *sq;
+
+               sq = list_entry(os->to_free.next, struct sample_queue, list);
+               list_del(&sq->list);
+               free(sq);
+       }
+}
+
+static int perf_session_deliver_event(struct perf_session *session,
+                                     event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_event_ops *ops,
+                                     u64 file_offset);
+
 static void flush_sample_queue(struct perf_session *s,
                               struct perf_event_ops *ops)
 {
-       struct list_head *head = &s->ordered_samples.samples_head;
-       u64 limit = s->ordered_samples.next_flush;
+       struct ordered_samples *os = &s->ordered_samples;
+       struct list_head *head = &os->samples;
        struct sample_queue *tmp, *iter;
+       struct sample_data sample;
+       u64 limit = os->next_flush;
+       u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
 
        if (!ops->ordered_samples || !limit)
                return;
 
        list_for_each_entry_safe(iter, tmp, head, list) {
                if (iter->timestamp > limit)
-                       return;
-
-               if (iter == s->ordered_samples.last_inserted)
-                       s->ordered_samples.last_inserted = NULL;
+                       break;
 
-               ops->sample((event_t *)iter->event, s);
+               event__parse_sample(iter->event, s, &sample);
+               perf_session_deliver_event(s, iter->event, &sample, ops,
+                                          iter->file_offset);
 
-               s->ordered_samples.last_flush = iter->timestamp;
+               os->last_flush = iter->timestamp;
                list_del(&iter->list);
-               free(iter->event);
-               free(iter);
+               list_add(&iter->list, &os->sample_cache);
+       }
+
+       if (list_empty(head)) {
+               os->last_sample = NULL;
+       } else if (last_ts <= limit) {
+               os->last_sample =
+                       list_entry(head->prev, struct sample_queue, list);
        }
 }
 
@@ -465,176 +560,263 @@ static int process_finished_round(event_t *event __used,
        return 0;
 }
 
-static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
-{
-       struct sample_queue *iter;
-
-       list_for_each_entry_reverse(iter, head, list) {
-               if (iter->timestamp < new->timestamp) {
-                       list_add(&new->list, &iter->list);
-                       return;
-               }
-       }
-
-       list_add(&new->list, head);
-}
-
-static void __queue_sample_before(struct sample_queue *new,
-                                 struct sample_queue *iter,
-                                 struct list_head *head)
-{
-       list_for_each_entry_continue_reverse(iter, head, list) {
-               if (iter->timestamp < new->timestamp) {
-                       list_add(&new->list, &iter->list);
-                       return;
-               }
-       }
-
-       list_add(&new->list, head);
-}
-
-static void __queue_sample_after(struct sample_queue *new,
-                                struct sample_queue *iter,
-                                struct list_head *head)
-{
-       list_for_each_entry_continue(iter, head, list) {
-               if (iter->timestamp > new->timestamp) {
-                       list_add_tail(&new->list, &iter->list);
-                       return;
-               }
-       }
-       list_add_tail(&new->list, head);
-}
-
 /* The queue is ordered by time */
-static void __queue_sample_event(struct sample_queue *new,
-                                struct perf_session *s)
+static void __queue_event(struct sample_queue *new, struct perf_session *s)
 {
-       struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
-       struct list_head *head = &s->ordered_samples.samples_head;
+       struct ordered_samples *os = &s->ordered_samples;
+       struct sample_queue *sample = os->last_sample;
+       u64 timestamp = new->timestamp;
+       struct list_head *p;
 
+       os->last_sample = new;
 
-       if (!last_inserted) {
-               __queue_sample_end(new, head);
+       if (!sample) {
+               list_add(&new->list, &os->samples);
+               os->max_timestamp = timestamp;
                return;
        }
 
        /*
-        * Most of the time the current event has a timestamp
-        * very close to the last event inserted, unless we just switched
-        * to another event buffer. Having a sorting based on a list and
-        * on the last inserted event that is close to the current one is
-        * probably more efficient than an rbtree based sorting.
+        * last_sample might point to some random place in the list as it's
+        * the last queued event. We expect that the new event is close to
+        * this.
         */
-       if (last_inserted->timestamp >= new->timestamp)
-               __queue_sample_before(new, last_inserted, head);
-       else
-               __queue_sample_after(new, last_inserted, head);
+       if (sample->timestamp <= timestamp) {
+               while (sample->timestamp <= timestamp) {
+                       p = sample->list.next;
+                       if (p == &os->samples) {
+                               list_add_tail(&new->list, &os->samples);
+                               os->max_timestamp = timestamp;
+                               return;
+                       }
+                       sample = list_entry(p, struct sample_queue, list);
+               }
+               list_add_tail(&new->list, &sample->list);
+       } else {
+               while (sample->timestamp > timestamp) {
+                       p = sample->list.prev;
+                       if (p == &os->samples) {
+                               list_add(&new->list, &os->samples);
+                               return;
+                       }
+                       sample = list_entry(p, struct sample_queue, list);
+               }
+               list_add(&new->list, &sample->list);
+       }
 }
 
-static int queue_sample_event(event_t *event, struct sample_data *data,
-                             struct perf_session *s)
+#define MAX_SAMPLE_BUFFER      (64 * 1024 / sizeof(struct sample_queue))
+
+static int perf_session_queue_event(struct perf_session *s, event_t *event,
+                                   struct sample_data *data, u64 file_offset)
 {
+       struct ordered_samples *os = &s->ordered_samples;
+       struct list_head *sc = &os->sample_cache;
        u64 timestamp = data->time;
        struct sample_queue *new;
 
+       if (!timestamp || timestamp == ~0ULL)
+               return -ETIME;
 
        if (timestamp < s->ordered_samples.last_flush) {
                printf("Warning: Timestamp below last timeslice flush\n");
                return -EINVAL;
        }
 
-       new = malloc(sizeof(*new));
-       if (!new)
-               return -ENOMEM;
+       if (!list_empty(sc)) {
+               new = list_entry(sc->next, struct sample_queue, list);
+               list_del(&new->list);
+       } else if (os->sample_buffer) {
+               new = os->sample_buffer + os->sample_buffer_idx;
+               if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
+                       os->sample_buffer = NULL;
+       } else {
+               os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
+               if (!os->sample_buffer)
+                       return -ENOMEM;
+               list_add(&os->sample_buffer->list, &os->to_free);
+               os->sample_buffer_idx = 2;
+               new = os->sample_buffer + 1;
+       }
 
        new->timestamp = timestamp;
+       new->file_offset = file_offset;
+       new->event = event;
 
-       new->event = malloc(event->header.size);
-       if (!new->event) {
-               free(new);
-               return -ENOMEM;
-       }
+       __queue_event(new, s);
 
-       memcpy(new->event, event, event->header.size);
+       return 0;
+}
 
-       __queue_sample_event(new, s);
-       s->ordered_samples.last_inserted = new;
+static void callchain__printf(struct sample_data *sample)
+{
+       unsigned int i;
 
-       if (new->timestamp > s->ordered_samples.max_timestamp)
-               s->ordered_samples.max_timestamp = new->timestamp;
+       printf("... chain: nr:%Lu\n", sample->callchain->nr);
 
-       return 0;
+       for (i = 0; i < sample->callchain->nr; i++)
+               printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
 }
 
-static int perf_session__process_sample(event_t *event, struct perf_session *s,
-                                       struct perf_event_ops *ops)
+static void perf_session__print_tstamp(struct perf_session *session,
+                                      event_t *event,
+                                      struct sample_data *sample)
 {
-       struct sample_data data;
+       if (event->header.type != PERF_RECORD_SAMPLE &&
+           !session->sample_id_all) {
+               fputs("-1 -1 ", stdout);
+               return;
+       }
 
-       if (!ops->ordered_samples)
-               return ops->sample(event, s);
+       if ((session->sample_type & PERF_SAMPLE_CPU))
+               printf("%u ", sample->cpu);
 
-       bzero(&data, sizeof(struct sample_data));
-       event__parse_sample(event, s->sample_type, &data);
+       if (session->sample_type & PERF_SAMPLE_TIME)
+               printf("%Lu ", sample->time);
+}
 
-       queue_sample_event(event, &data, s);
+static void dump_event(struct perf_session *session, event_t *event,
+                      u64 file_offset, struct sample_data *sample)
+{
+       if (!dump_trace)
+               return;
 
-       return 0;
+       printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
+              event->header.type);
+
+       trace_event(event);
+
+       if (sample)
+               perf_session__print_tstamp(session, event, sample);
+
+       printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
+              event__get_event_name(event->header.type));
 }
 
-static int perf_session__process_event(struct perf_session *self,
-                                      event_t *event,
-                                      struct perf_event_ops *ops,
-                                      u64 offset, u64 head)
+static void dump_sample(struct perf_session *session, event_t *event,
+                       struct sample_data *sample)
 {
-       trace_event(event);
+       if (!dump_trace)
+               return;
 
-       if (event->header.type < PERF_RECORD_HEADER_MAX) {
-               dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
-                           offset + head, event->header.size,
-                           event__name[event->header.type]);
-               hists__inc_nr_events(&self->hists, event->header.type);
-       }
+       printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+              sample->pid, sample->tid, sample->ip, sample->period);
 
-       if (self->header.needs_swap && event__swap_ops[event->header.type])
-               event__swap_ops[event->header.type](event);
+       if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
+               callchain__printf(sample);
+}
+
+static int perf_session_deliver_event(struct perf_session *session,
+                                     event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_event_ops *ops,
+                                     u64 file_offset)
+{
+       dump_event(session, event, file_offset, sample);
 
        switch (event->header.type) {
        case PERF_RECORD_SAMPLE:
-               return perf_session__process_sample(event, self, ops);
+               dump_sample(session, event, sample);
+               return ops->sample(event, sample, session);
        case PERF_RECORD_MMAP:
-               return ops->mmap(event, self);
+               return ops->mmap(event, sample, session);
        case PERF_RECORD_COMM:
-               return ops->comm(event, self);
+               return ops->comm(event, sample, session);
        case PERF_RECORD_FORK:
-               return ops->fork(event, self);
+               return ops->fork(event, sample, session);
        case PERF_RECORD_EXIT:
-               return ops->exit(event, self);
+               return ops->exit(event, sample, session);
        case PERF_RECORD_LOST:
-               return ops->lost(event, self);
+               return ops->lost(event, sample, session);
        case PERF_RECORD_READ:
-               return ops->read(event, self);
+               return ops->read(event, sample, session);
        case PERF_RECORD_THROTTLE:
-               return ops->throttle(event, self);
+               return ops->throttle(event, sample, session);
        case PERF_RECORD_UNTHROTTLE:
-               return ops->unthrottle(event, self);
+               return ops->unthrottle(event, sample, session);
+       default:
+               ++session->hists.stats.nr_unknown_events;
+               return -1;
+       }
+}
+
+static int perf_session__preprocess_sample(struct perf_session *session,
+                                          event_t *event, struct sample_data *sample)
+{
+       if (event->header.type != PERF_RECORD_SAMPLE ||
+           !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
+               return 0;
+
+       if (!ip_callchain__valid(sample->callchain, event)) {
+               pr_debug("call-chain problem with event, skipping it.\n");
+               ++session->hists.stats.nr_invalid_chains;
+               session->hists.stats.total_invalid_chains += sample->period;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int perf_session__process_user_event(struct perf_session *session, event_t *event,
+                                           struct perf_event_ops *ops, u64 file_offset)
+{
+       dump_event(session, event, file_offset, NULL);
+
+       /* These events are processed right away */
+       switch (event->header.type) {
        case PERF_RECORD_HEADER_ATTR:
-               return ops->attr(event, self);
+               return ops->attr(event, session);
        case PERF_RECORD_HEADER_EVENT_TYPE:
-               return ops->event_type(event, self);
+               return ops->event_type(event, session);
        case PERF_RECORD_HEADER_TRACING_DATA:
                /* setup for reading amidst mmap */
-               lseek(self->fd, offset + head, SEEK_SET);
-               return ops->tracing_data(event, self);
+               lseek(session->fd, file_offset, SEEK_SET);
+               return ops->tracing_data(event, session);
        case PERF_RECORD_HEADER_BUILD_ID:
-               return ops->build_id(event, self);
+               return ops->build_id(event, session);
        case PERF_RECORD_FINISHED_ROUND:
-               return ops->finished_round(event, self, ops);
+               return ops->finished_round(event, session, ops);
        default:
-               ++self->hists.stats.nr_unknown_events;
-               return -1;
+               return -EINVAL;
+       }
+}
+
+static int perf_session__process_event(struct perf_session *session,
+                                      event_t *event,
+                                      struct perf_event_ops *ops,
+                                      u64 file_offset)
+{
+       struct sample_data sample;
+       int ret;
+
+       if (session->header.needs_swap && event__swap_ops[event->header.type])
+               event__swap_ops[event->header.type](event);
+
+       if (event->header.type >= PERF_RECORD_HEADER_MAX)
+               return -EINVAL;
+
+       hists__inc_nr_events(&session->hists, event->header.type);
+
+       if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+               return perf_session__process_user_event(session, event, ops, file_offset);
+
+       /*
+        * For all kernel events we get the sample data
+        */
+       event__parse_sample(event, session, &sample);
+
+       /* Preprocess sample records - precheck callchains */
+       if (perf_session__preprocess_sample(session, event, &sample))
+               return 0;
+
+       if (ops->ordered_samples) {
+               ret = perf_session_queue_event(session, event, &sample,
+                                              file_offset);
+               if (ret != -ETIME)
+                       return ret;
        }
+
+       return perf_session_deliver_event(session, event, &sample, ops,
+                                         file_offset);
 }
 
 void perf_event_header__bswap(struct perf_event_header *self)
@@ -724,8 +906,7 @@ more:
        }
 
        if (size == 0 ||
-           (skip = perf_session__process_event(self, &event, ops,
-                                               0, head)) < 0) {
+           (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
                dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
                            head, event.header.size, event.header.type);
                /*
@@ -740,9 +921,6 @@ more:
 
        head += size;
 
-       dump_printf("\n%#Lx [%#x]: event: %d\n",
-                   head, event.header.size, event.header.type);
-
        if (skip > 0)
                head += skip;
 
@@ -751,82 +929,90 @@ more:
 done:
        err = 0;
 out_err:
+       perf_session_free_sample_buffers(self);
        return err;
 }
 
-int __perf_session__process_events(struct perf_session *self,
+int __perf_session__process_events(struct perf_session *session,
                                   u64 data_offset, u64 data_size,
                                   u64 file_size, struct perf_event_ops *ops)
 {
-       int err, mmap_prot, mmap_flags;
-       u64 head, shift;
-       u64 offset = 0;
-       size_t  page_size;
+       u64 head, page_offset, file_offset, file_pos, progress_next;
+       int err, mmap_prot, mmap_flags, map_idx = 0;
+       struct ui_progress *progress;
+       size_t  page_size, mmap_size;
+       char *buf, *mmaps[8];
        event_t *event;
        uint32_t size;
-       char *buf;
-       struct ui_progress *progress = ui_progress__new("Processing events...",
-                                                       self->size);
-       if (progress == NULL)
-               return -1;
 
        perf_event_ops__fill_defaults(ops);
 
        page_size = sysconf(_SC_PAGESIZE);
 
-       head = data_offset;
-       shift = page_size * (head / page_size);
-       offset += shift;
-       head -= shift;
+       page_offset = page_size * (data_offset / page_size);
+       file_offset = page_offset;
+       head = data_offset - page_offset;
+
+       if (data_offset + data_size < file_size)
+               file_size = data_offset + data_size;
+
+       progress_next = file_size / 16;
+       progress = ui_progress__new("Processing events...", file_size);
+       if (progress == NULL)
+               return -1;
+
+       mmap_size = session->mmap_window;
+       if (mmap_size > file_size)
+               mmap_size = file_size;
+
+       memset(mmaps, 0, sizeof(mmaps));
 
        mmap_prot  = PROT_READ;
        mmap_flags = MAP_SHARED;
 
-       if (self->header.needs_swap) {
+       if (session->header.needs_swap) {
                mmap_prot  |= PROT_WRITE;
                mmap_flags = MAP_PRIVATE;
        }
 remap:
-       buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
-                  mmap_flags, self->fd, offset);
+       buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
+                  file_offset);
        if (buf == MAP_FAILED) {
                pr_err("failed to mmap file\n");
                err = -errno;
                goto out_err;
        }
+       mmaps[map_idx] = buf;
+       map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
+       file_pos = file_offset + head;
 
 more:
        event = (event_t *)(buf + head);
-       ui_progress__update(progress, offset);
 
-       if (self->header.needs_swap)
+       if (session->header.needs_swap)
                perf_event_header__bswap(&event->header);
        size = event->header.size;
        if (size == 0)
                size = 8;
 
-       if (head + event->header.size >= page_size * self->mmap_window) {
-               int munmap_ret;
-
-               shift = page_size * (head / page_size);
-
-               munmap_ret = munmap(buf, page_size * self->mmap_window);
-               assert(munmap_ret == 0);
+       if (head + event->header.size >= mmap_size) {
+               if (mmaps[map_idx]) {
+                       munmap(mmaps[map_idx], mmap_size);
+                       mmaps[map_idx] = NULL;
+               }
 
-               offset += shift;
-               head -= shift;
+               page_offset = page_size * (head / page_size);
+               file_offset += page_offset;
+               head -= page_offset;
                goto remap;
        }
 
        size = event->header.size;
 
-       dump_printf("\n%#Lx [%#x]: event: %d\n",
-                   offset + head, event->header.size, event->header.type);
-
        if (size == 0 ||
-           perf_session__process_event(self, event, ops, offset, head) < 0) {
+           perf_session__process_event(session, event, ops, file_pos) < 0) {
                dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
-                           offset + head, event->header.size,
+                           file_offset + head, event->header.size,
                            event->header.type);
                /*
                 * assume we lost track of the stream, check alignment, and
@@ -839,19 +1025,49 @@ more:
        }
 
        head += size;
+       file_pos += size;
 
-       if (offset + head >= data_offset + data_size)
-               goto done;
+       if (file_pos >= progress_next) {
+               progress_next += file_size / 16;
+               ui_progress__update(progress, file_pos);
+       }
 
-       if (offset + head < file_size)
+       if (file_pos < file_size)
                goto more;
-done:
+
        err = 0;
        /* do the final flush for ordered samples */
-       self->ordered_samples.next_flush = ULLONG_MAX;
-       flush_sample_queue(self, ops);
+       session->ordered_samples.next_flush = ULLONG_MAX;
+       flush_sample_queue(session, ops);
 out_err:
        ui_progress__delete(progress);
+
+       if (ops->lost == event__process_lost &&
+           session->hists.stats.total_lost != 0) {
+               ui__warning("Processed %Lu events and LOST %Lu!\n\n"
+                           "Check IO/CPU overload!\n\n",
+                           session->hists.stats.total_period,
+                           session->hists.stats.total_lost);
+       }
+
+       if (session->hists.stats.nr_unknown_events != 0) {
+               ui__warning("Found %u unknown events!\n\n"
+                           "Is this an older tool processing a perf.data "
+                           "file generated by a more recent tool?\n\n"
+                           "If that is not the case, consider "
+                           "reporting to linux-kernel@vger.kernel.org.\n\n",
+                           session->hists.stats.nr_unknown_events);
+       }
+
+       if (session->hists.stats.nr_invalid_chains != 0) {
+               ui__warning("Found invalid callchains!\n\n"
+                           "%u out of %u events were discarded for this reason.\n\n"
+                           "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+                           session->hists.stats.nr_invalid_chains,
+                           session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
+       }
+
+       perf_session_free_sample_buffers(session);
        return err;
 }
 
index 9fa0fc2a863f1259caf8caf7494f867d0737a297..ffe4b98db8f0883b3071dbc6a63a7cb50301da90 100644 (file)
@@ -17,8 +17,12 @@ struct ordered_samples {
        u64                     last_flush;
        u64                     next_flush;
        u64                     max_timestamp;
-       struct list_head        samples_head;
-       struct sample_queue     *last_inserted;
+       struct list_head        samples;
+       struct list_head        sample_cache;
+       struct list_head        to_free;
+       struct sample_queue     *sample_buffer;
+       struct sample_queue     *last_sample;
+       int                     sample_buffer_idx;
 };
 
 struct perf_session {
@@ -42,6 +46,8 @@ struct perf_session {
        int                     fd;
        bool                    fd_pipe;
        bool                    repipe;
+       bool                    sample_id_all;
+       u16                     id_hdr_size;
        int                     cwdlen;
        char                    *cwd;
        struct ordered_samples  ordered_samples;
@@ -50,7 +56,9 @@ struct perf_session {
 
 struct perf_event_ops;
 
-typedef int (*event_op)(event_t *self, struct perf_session *session);
+typedef int (*event_op)(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
 typedef int (*event_op2)(event_t *self, struct perf_session *session,
                         struct perf_event_ops *ops);
 
@@ -63,16 +71,19 @@ struct perf_event_ops {
                        lost,
                        read,
                        throttle,
-                       unthrottle,
-                       attr,
+                       unthrottle;
+       event_synth_op  attr,
                        event_type,
                        tracing_data,
                        build_id;
        event_op2       finished_round;
        bool            ordered_samples;
+       bool            ordering_requires_timestamps;
 };
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe);
+struct perf_session *perf_session__new(const char *filename, int mode,
+                                      bool force, bool repipe,
+                                      struct perf_event_ops *ops);
 void perf_session__delete(struct perf_session *self);
 
 void perf_event_header__bswap(struct perf_event_header *self);
@@ -100,6 +111,8 @@ int perf_session__create_kernel_maps(struct perf_session *self);
 
 int do_read(int fd, void *buf, size_t size);
 void perf_session__update_sample_type(struct perf_session *self);
+void perf_session__set_sample_id_all(struct perf_session *session, bool value);
+void perf_session__set_sample_type(struct perf_session *session, u64 type);
 void perf_session__remove_thread(struct perf_session *self, struct thread *th);
 
 static inline
index b62a553cc67d969c104638b9bf33922f351f06bb..f44fa541d56e67c6bb6c976e78123e99657ffbee 100644 (file)
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
                return repsep_snprintf(bf, size, "%-*s", width, dso_name);
        }
 
-       return repsep_snprintf(bf, size, "%*Lx", width, self->ip);
+       return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
 }
 
 /* --sort symbol */
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
 
        if (verbose) {
                char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
-               ret += repsep_snprintf(bf, size, "%*Lx %c ",
+               ret += repsep_snprintf(bf, size, "%-#*llx %c ",
                                       BITS_PER_LONG / 4, self->ip, o);
        }
 
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
                ret += repsep_snprintf(bf + ret, size - ret, "%s",
                                       self->ms.sym->name);
        else
-               ret += repsep_snprintf(bf + ret, size - ret, "%*Lx",
+               ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
                                       BITS_PER_LONG / 4, self->ip);
 
        return ret;
index 439ab947daf4af9da352caac290d372ce23846ac..15ccfba8cdf805111d56b1e7f3bf71431ef2c1e4 100644 (file)
 #include <limits.h>
 #include <sys/utsname.h>
 
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 128
+#endif
+
 #ifndef NT_GNU_BUILD_ID
 #define NT_GNU_BUILD_ID 3
 #endif
@@ -41,6 +45,7 @@ struct symbol_conf symbol_conf = {
        .exclude_other    = true,
        .use_modules      = true,
        .try_vmlinux_path = true,
+       .symfs            = "",
 };
 
 int dso__name_len(const struct dso *self)
@@ -92,7 +97,7 @@ static void symbols__fixup_end(struct rb_root *self)
                prev = curr;
                curr = rb_entry(nd, struct symbol, rb_node);
 
-               if (prev->end == prev->start)
+               if (prev->end == prev->start && prev->end != curr->start)
                        prev->end = curr->start - 1;
        }
 
@@ -121,7 +126,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
         * We still haven't the actual symbols, so guess the
         * last map final address.
         */
-       curr->end = ~0UL;
+       curr->end = ~0ULL;
 }
 
 static void map_groups__fixup_end(struct map_groups *self)
@@ -425,16 +430,25 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
 
 int kallsyms__parse(const char *filename, void *arg,
                    int (*process_symbol)(void *arg, const char *name,
-                                                    char type, u64 start))
+                                         char type, u64 start, u64 end))
 {
        char *line = NULL;
        size_t n;
-       int err = 0;
+       int err = -1;
+       u64 prev_start = 0;
+       char prev_symbol_type = 0;
+       char *prev_symbol_name;
        FILE *file = fopen(filename, "r");
 
        if (file == NULL)
                goto out_failure;
 
+       prev_symbol_name = malloc(KSYM_NAME_LEN);
+       if (prev_symbol_name == NULL)
+               goto out_close;
+
+       err = 0;
+
        while (!feof(file)) {
                u64 start;
                int line_len, len;
@@ -454,14 +468,33 @@ int kallsyms__parse(const char *filename, void *arg,
                        continue;
 
                symbol_type = toupper(line[len]);
-               symbol_name = line + len + 2;
+               len += 2;
+               symbol_name = line + len;
+               len = line_len - len;
 
-               err = process_symbol(arg, symbol_name, symbol_type, start);
-               if (err)
+               if (len >= KSYM_NAME_LEN) {
+                       err = -1;
                        break;
+               }
+
+               if (prev_symbol_type) {
+                       u64 end = start;
+                       if (end != prev_start)
+                               --end;
+                       err = process_symbol(arg, prev_symbol_name,
+                                            prev_symbol_type, prev_start, end);
+                       if (err)
+                               break;
+               }
+
+               memcpy(prev_symbol_name, symbol_name, len + 1);
+               prev_symbol_type = symbol_type;
+               prev_start = start;
        }
 
+       free(prev_symbol_name);
        free(line);
+out_close:
        fclose(file);
        return err;
 
@@ -483,7 +516,7 @@ static u8 kallsyms2elf_type(char type)
 }
 
 static int map__process_kallsym_symbol(void *arg, const char *name,
-                                      char type, u64 start)
+                                      char type, u64 start, u64 end)
 {
        struct symbol *sym;
        struct process_kallsyms_args *a = arg;
@@ -492,11 +525,8 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
        if (!symbol_type__is_a(type, a->map->type))
                return 0;
 
-       /*
-        * Will fix up the end later, when we have all symbols sorted.
-        */
-       sym = symbol__new(start, 0, kallsyms2elf_type(type), name);
-
+       sym = symbol__new(start, end - start + 1,
+                         kallsyms2elf_type(type), name);
        if (sym == NULL)
                return -ENOMEM;
        /*
@@ -649,7 +679,6 @@ int dso__load_kallsyms(struct dso *self, const char *filename,
        if (dso__load_all_kallsyms(self, filename, map) < 0)
                return -1;
 
-       symbols__fixup_end(&self->symbols[map->type]);
        if (self->kernel == DSO_TYPE_GUEST_KERNEL)
                self->origin = DSO__ORIG_GUEST_KERNEL;
        else
@@ -839,8 +868,11 @@ static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
        char sympltname[1024];
        Elf *elf;
        int nr = 0, symidx, fd, err = 0;
+       char name[PATH_MAX];
 
-       fd = open(self->long_name, O_RDONLY);
+       snprintf(name, sizeof(name), "%s%s",
+                symbol_conf.symfs, self->long_name);
+       fd = open(name, O_RDONLY);
        if (fd < 0)
                goto out;
 
@@ -1452,16 +1484,19 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
             self->origin++) {
                switch (self->origin) {
                case DSO__ORIG_BUILD_ID_CACHE:
-                       if (dso__build_id_filename(self, name, size) == NULL)
+                       /* skip the locally configured cache if a symfs is given */
+                       if (symbol_conf.symfs[0] ||
+                           (dso__build_id_filename(self, name, size) == NULL)) {
                                continue;
+                       }
                        break;
                case DSO__ORIG_FEDORA:
-                       snprintf(name, size, "/usr/lib/debug%s.debug",
-                                self->long_name);
+                       snprintf(name, size, "%s/usr/lib/debug%s.debug",
+                                symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_UBUNTU:
-                       snprintf(name, size, "/usr/lib/debug%s",
-                                self->long_name);
+                       snprintf(name, size, "%s/usr/lib/debug%s",
+                                symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_BUILDID: {
                        char build_id_hex[BUILD_ID_SIZE * 2 + 1];
@@ -1473,19 +1508,26 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
                                          sizeof(self->build_id),
                                          build_id_hex);
                        snprintf(name, size,
-                                "/usr/lib/debug/.build-id/%.2s/%s.debug",
-                                build_id_hex, build_id_hex + 2);
+                                "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
+                                symbol_conf.symfs, build_id_hex, build_id_hex + 2);
                        }
                        break;
                case DSO__ORIG_DSO:
-                       snprintf(name, size, "%s", self->long_name);
+                       snprintf(name, size, "%s%s",
+                            symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_GUEST_KMODULE:
                        if (map->groups && map->groups->machine)
                                root_dir = map->groups->machine->root_dir;
                        else
                                root_dir = "";
-                       snprintf(name, size, "%s%s", root_dir, self->long_name);
+                       snprintf(name, size, "%s%s%s", symbol_conf.symfs,
+                                root_dir, self->long_name);
+                       break;
+
+               case DSO__ORIG_KMODULE:
+                       snprintf(name, size, "%s%s", symbol_conf.symfs,
+                                self->long_name);
                        break;
 
                default:
@@ -1784,17 +1826,20 @@ int dso__load_vmlinux(struct dso *self, struct map *map,
                      const char *vmlinux, symbol_filter_t filter)
 {
        int err = -1, fd;
+       char symfs_vmlinux[PATH_MAX];
 
-       fd = open(vmlinux, O_RDONLY);
+       snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s",
+                symbol_conf.symfs, vmlinux);
+       fd = open(symfs_vmlinux, O_RDONLY);
        if (fd < 0)
                return -1;
 
        dso__set_loaded(self, map->type);
-       err = dso__load_sym(self, map, vmlinux, fd, filter, 0, 0);
+       err = dso__load_sym(self, map, symfs_vmlinux, fd, filter, 0, 0);
        close(fd);
 
        if (err > 0)
-               pr_debug("Using %s for symbols\n", vmlinux);
+               pr_debug("Using %s for symbols\n", symfs_vmlinux);
 
        return err;
 }
@@ -1836,8 +1881,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
        const char *kallsyms_filename = NULL;
        char *kallsyms_allocated_filename = NULL;
        /*
-        * Step 1: if the user specified a vmlinux filename, use it and only
-        * it, reporting errors to the user if it cannot be used.
+        * Step 1: if the user specified a kallsyms or vmlinux filename, use
+        * it and only it, reporting errors to the user if it cannot be used.
         *
         * For instance, try to analyse an ARM perf.data file _without_ a
         * build-id, or if the user specifies the wrong path to the right
@@ -1850,6 +1895,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
         * validation in dso__load_vmlinux and will bail out if they don't
         * match.
         */
+       if (symbol_conf.kallsyms_name != NULL) {
+               kallsyms_filename = symbol_conf.kallsyms_name;
+               goto do_kallsyms;
+       }
+
        if (symbol_conf.vmlinux_name != NULL) {
                err = dso__load_vmlinux(self, map,
                                        symbol_conf.vmlinux_name, filter);
@@ -1867,6 +1917,10 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
                        goto out_fixup;
        }
 
+       /* do not try local files if a symfs was given */
+       if (symbol_conf.symfs[0] != 0)
+               return -1;
+
        /*
         * Say the kernel DSO was created when processing the build-id header table,
         * we have a build-id, so check if it is the same as the running kernel,
@@ -2136,7 +2190,7 @@ struct process_args {
 };
 
 static int symbol__in_kernel(void *arg, const char *name,
-                            char type __used, u64 start)
+                            char type __used, u64 start, u64 end __used)
 {
        struct process_args *args = arg;
 
@@ -2257,9 +2311,6 @@ static int vmlinux_path__init(void)
        struct utsname uts;
        char bf[PATH_MAX];
 
-       if (uname(&uts) < 0)
-               return -1;
-
        vmlinux_path = malloc(sizeof(char *) * 5);
        if (vmlinux_path == NULL)
                return -1;
@@ -2272,6 +2323,14 @@ static int vmlinux_path__init(void)
        if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
                goto out_fail;
        ++vmlinux_path__nr_entries;
+
+       /* only try running kernel version if no symfs was given */
+       if (symbol_conf.symfs[0] != 0)
+               return 0;
+
+       if (uname(&uts) < 0)
+               return -1;
+
        snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
        vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
        if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
@@ -2331,6 +2390,8 @@ static int setup_list(struct strlist **list, const char *list_str,
 
 int symbol__init(void)
 {
+       const char *symfs;
+
        if (symbol_conf.initialized)
                return 0;
 
@@ -2359,6 +2420,18 @@ int symbol__init(void)
                       symbol_conf.sym_list_str, "symbol") < 0)
                goto out_free_comm_list;
 
+       /*
+        * A path to symbols of "/" is identical to ""
+        * reset here for simplicity.
+        */
+       symfs = realpath(symbol_conf.symfs, NULL);
+       if (symfs == NULL)
+               symfs = symbol_conf.symfs;
+       if (strcmp(symfs, "/") == 0)
+               symbol_conf.symfs = "";
+       if (symfs != symbol_conf.symfs)
+               free((void *)symfs);
+
        symbol_conf.initialized = true;
        return 0;
 
index 6c6eafdb932dacefd29c3ea21e5bef90a80cc9a9..670cd1c88f54dc932d18b7c79d609d61e7560295 100644 (file)
@@ -72,6 +72,7 @@ struct symbol_conf {
                        show_cpu_utilization,
                        initialized;
        const char      *vmlinux_name,
+                       *kallsyms_name,
                        *source_prefix,
                        *field_sep;
        const char      *default_guest_vmlinux_name,
@@ -85,6 +86,7 @@ struct symbol_conf {
        struct strlist  *dso_list,
                        *comm_list,
                        *sym_list;
+       const char      *symfs;
 };
 
 extern struct symbol_conf symbol_conf;
@@ -215,7 +217,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 int build_id__sprintf(const u8 *self, int len, char *bf);
 int kallsyms__parse(const char *filename, void *arg,
                    int (*process_symbol)(void *arg, const char *name,
-                                         char type, u64 start));
+                                         char type, u64 start, u64 end));
 
 void machine__destroy_kernel_maps(struct machine *self);
 int __machine__create_kernel_maps(struct machine *self, struct dso *kernel);
index 056c69521a38098a8053d18f3e615d3a35390fd4..7b5a8926624e49be67d802fc91afc56451bead23 100644 (file)
@@ -104,10 +104,24 @@ out_destroy_form:
        return rc;
 }
 
-static const char yes[] = "Yes", no[] = "No";
+static const char yes[] = "Yes", no[] = "No",
+                 warning_str[] = "Warning!", ok[] = "Ok";
 
 bool ui__dialog_yesno(const char *msg)
 {
        /* newtWinChoice should really be accepting const char pointers... */
        return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1;
 }
+
+void ui__warning(const char *format, ...)
+{
+       va_list args;
+
+       va_start(args, format);
+       if (use_browser > 0)
+               newtWinMessagev((char *)warning_str, (char *)ok,
+                               (char *)format, args);
+       else
+               vfprintf(stderr, format, args);
+       va_end(args);
+}