]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge branch 'perf/probes' into perf/core
authorIngo Molnar <mingo@elte.hu>
Thu, 3 Dec 2009 19:11:37 +0000 (20:11 +0100)
committerIngo Molnar <mingo@elte.hu>
Thu, 3 Dec 2009 19:11:38 +0000 (20:11 +0100)
Merge reason: add these fixes to 'perf probe'.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
149 files changed:
Documentation/DocBook/tracepoint.tmpl
arch/Kconfig
arch/x86/Kconfig
arch/x86/include/asm/Kbuild
arch/x86/include/asm/a.out-core.h
arch/x86/include/asm/debugreg.h
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/hw_breakpoint.h [new file with mode: 0644]
arch/x86/include/asm/mce.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/ptrace.h
arch/x86/kernel/Makefile
arch/x86/kernel/cpu/Makefile
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/hw_breakpoint.c [new file with mode: 0644]
arch/x86/kernel/irq.c
arch/x86/kernel/kgdb.c
arch/x86/kernel/kprobes.c
arch/x86/kernel/machine_kexec_32.c
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/setup.c
arch/x86/kernel/signal.c
arch/x86/kernel/traps.c
arch/x86/kvm/x86.c
arch/x86/mm/kmmio.c
arch/x86/power/cpu.c
arch/x86/tools/test_get_len.c
drivers/edac/edac_mce_amd.c
include/linux/ftrace_event.h
include/linux/hw_breakpoint.h [new file with mode: 0644]
include/linux/perf_event.h
include/linux/syscalls.h
include/linux/tracepoint.h
include/trace/define_trace.h
include/trace/events/block.h
include/trace/events/ext4.h
include/trace/events/irq.h
include/trace/events/jbd2.h
include/trace/events/kmem.h
include/trace/events/mce.h [new file with mode: 0644]
include/trace/events/module.h
include/trace/events/power.h
include/trace/events/sched.h
include/trace/events/signal.h [new file with mode: 0644]
include/trace/events/workqueue.h
include/trace/ftrace.h
include/trace/syscall.h
kernel/Makefile
kernel/exit.c
kernel/hw_breakpoint.c [new file with mode: 0644]
kernel/kallsyms.c
kernel/perf_event.c
kernel/signal.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/trace.h
kernel/trace/trace_entries.h
kernel/trace/trace_event_profile.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_ksym.c [new file with mode: 0644]
kernel/trace/trace_selftest.c
kernel/trace/trace_syscalls.c
samples/Kconfig
samples/Makefile
samples/hw_breakpoint/Makefile [new file with mode: 0644]
samples/hw_breakpoint/data_breakpoint.c [new file with mode: 0644]
scripts/kernel-doc
tools/perf/.gitignore
tools/perf/Documentation/perf-kmem.txt [new file with mode: 0644]
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-trace-perl.txt [new file with mode: 0644]
tools/perf/Documentation/perf-trace.txt
tools/perf/Makefile
tools/perf/bench/bench.h
tools/perf/bench/mem-memcpy.c [new file with mode: 0644]
tools/perf/builtin-annotate.c
tools/perf/builtin-bench.c
tools/perf/builtin-help.c
tools/perf/builtin-kmem.c [new file with mode: 0644]
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/builtin.h
tools/perf/command-list.txt
tools/perf/perf.c
tools/perf/perf.h
tools/perf/scripts/perl/Perf-Trace-Util/Context.c [new file with mode: 0644]
tools/perf/scripts/perl/Perf-Trace-Util/Context.xs [new file with mode: 0644]
tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL [new file with mode: 0644]
tools/perf/scripts/perl/Perf-Trace-Util/README [new file with mode: 0644]
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm [new file with mode: 0644]
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm [new file with mode: 0644]
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm [new file with mode: 0644]
tools/perf/scripts/perl/Perf-Trace-Util/typemap [new file with mode: 0644]
tools/perf/scripts/perl/bin/check-perf-trace-record [new file with mode: 0644]
tools/perf/scripts/perl/bin/check-perf-trace-report [new file with mode: 0644]
tools/perf/scripts/perl/bin/rw-by-file-record [new file with mode: 0644]
tools/perf/scripts/perl/bin/rw-by-file-report [new file with mode: 0644]
tools/perf/scripts/perl/bin/rw-by-pid-record [new file with mode: 0644]
tools/perf/scripts/perl/bin/rw-by-pid-report [new file with mode: 0644]
tools/perf/scripts/perl/bin/wakeup-latency-record [new file with mode: 0644]
tools/perf/scripts/perl/bin/wakeup-latency-report [new file with mode: 0644]
tools/perf/scripts/perl/bin/workqueue-stats-record [new file with mode: 0644]
tools/perf/scripts/perl/bin/workqueue-stats-report [new file with mode: 0644]
tools/perf/scripts/perl/check-perf-trace.pl [new file with mode: 0644]
tools/perf/scripts/perl/rw-by-file.pl [new file with mode: 0644]
tools/perf/scripts/perl/rw-by-pid.pl [new file with mode: 0644]
tools/perf/scripts/perl/wakeup-latency.pl [new file with mode: 0644]
tools/perf/scripts/perl/workqueue-stats.pl [new file with mode: 0644]
tools/perf/util/ctype.c
tools/perf/util/data_map.c
tools/perf/util/data_map.h
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/include/asm/bug.h [new file with mode: 0644]
tools/perf/util/include/linux/bitops.h
tools/perf/util/map.c
tools/perf/util/parse-events.c
tools/perf/util/probe-event.c [new file with mode: 0644]
tools/perf/util/probe-event.h [new file with mode: 0644]
tools/perf/util/string.c
tools/perf/util/string.h
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/trace-event-info.c
tools/perf/util/trace-event-parse.c
tools/perf/util/trace-event-perl.c [new file with mode: 0644]
tools/perf/util/trace-event-perl.h [new file with mode: 0644]
tools/perf/util/trace-event-read.c
tools/perf/util/trace-event.h
tools/perf/util/util.h
tools/perf/util/wrapper.c

index b0756d0fd57910a44d2f2d29fdb4edf87dac21ca..8bca1d5cec09a8bf6c8c5f0e6f159d770fcf806c 100644 (file)
@@ -86,4 +86,9 @@
 !Iinclude/trace/events/irq.h
   </chapter>
 
+  <chapter id="signal">
+   <title>SIGNAL</title>
+!Iinclude/trace/events/signal.h
+  </chapter>
+
 </book>
index 7f418bbc261a0f825ab6072f935f1f974a2f746c..eef3bbb970753c1d840cb9c1520147850dda6800 100644 (file)
@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG
 config HAVE_DEFAULT_NO_SPIN_MUTEXES
        bool
 
+config HAVE_HW_BREAKPOINT
+       bool
+       depends on HAVE_PERF_EVENTS
+       select ANON_INODES
+       select PERF_EVENTS
+
+
 source "kernel/gcov/Kconfig"
index 72ace9515a07a44525778899e1ea04b32b3accbc..178084b4377ccbebb2fb089ea1c17742f9addbfe 100644 (file)
@@ -49,6 +49,7 @@ config X86
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_LZMA
+       select HAVE_HW_BREAKPOINT
        select HAVE_ARCH_KMEMCHECK
 
 config OUTPUT_FORMAT
index 4a8e80cdcfa57a7faff08a2042a6b6fb64f5ae66..9f828f87ca35f418d24d4b7674477f643eea773d 100644 (file)
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
 header-y += sigcontext32.h
 header-y += ucontext.h
 header-y += processor-flags.h
+header-y += hw_breakpoint.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
index bb70e397aa84c0e7cfa452c1efa9298b8a58c985..7a15588e45d47265391ec508c787d98b374aaeb9 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/user.h>
 #include <linux/elfcore.h>
+#include <asm/debugreg.h>
 
 /*
  * fill in the user structure for an a.out core dump
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
                        >> PAGE_SHIFT;
        dump->u_dsize -= dump->u_tsize;
        dump->u_ssize = 0;
-       dump->u_debugreg[0] = current->thread.debugreg0;
-       dump->u_debugreg[1] = current->thread.debugreg1;
-       dump->u_debugreg[2] = current->thread.debugreg2;
-       dump->u_debugreg[3] = current->thread.debugreg3;
-       dump->u_debugreg[4] = 0;
-       dump->u_debugreg[5] = 0;
-       dump->u_debugreg[6] = current->thread.debugreg6;
-       dump->u_debugreg[7] = current->thread.debugreg7;
+       aout_dump_debugregs(dump);
 
        if (dump->start_stack < TASK_SIZE)
                dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
index 3ea6f37be9e2d29a69f6982bb3ddcc80554f1652..8240f76b531e0959be5a4fa823b1820d5d5952b5 100644 (file)
@@ -18,6 +18,7 @@
 #define DR_TRAP1       (0x2)           /* db1 */
 #define DR_TRAP2       (0x4)           /* db2 */
 #define DR_TRAP3       (0x8)           /* db3 */
+#define DR_TRAP_BITS   (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
 
 #define DR_STEP                (0x4000)        /* single-step */
 #define DR_SWITCH      (0x8000)        /* task switch */
@@ -49,6 +50,8 @@
 
 #define DR_LOCAL_ENABLE_SHIFT 0    /* Extra shift to the local enable bit */
 #define DR_GLOBAL_ENABLE_SHIFT 1   /* Extra shift to the global enable bit */
+#define DR_LOCAL_ENABLE (0x1)      /* Local enable for reg 0 */
+#define DR_GLOBAL_ENABLE (0x2)     /* Global enable for reg 0 */
 #define DR_ENABLE_SIZE 2           /* 2 enable bits per register */
 
 #define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
 #define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
 #define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
 
+/*
+ * HW breakpoint additions
+ */
+#ifdef __KERNEL__
+
+DECLARE_PER_CPU(unsigned long, cpu_dr7);
+
+static inline void hw_breakpoint_disable(void)
+{
+       /* Zero the control register for HW Breakpoint */
+       set_debugreg(0UL, 7);
+
+       /* Zero-out the individual HW breakpoint address registers */
+       set_debugreg(0UL, 0);
+       set_debugreg(0UL, 1);
+       set_debugreg(0UL, 2);
+       set_debugreg(0UL, 3);
+}
+
+static inline int hw_breakpoint_active(void)
+{
+       return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
+}
+
+extern void aout_dump_debugregs(struct user *dump);
+
+extern void hw_breakpoint_restore(void);
+
+#endif /* __KERNEL__ */
+
 #endif /* _ASM_X86_DEBUGREG_H */
index 82e3e8f010439cde125a3a75c4f0d65a7068b573..108eb6fd1ae7f6da8c94ca72197fdf7f4e3ec74d 100644 (file)
@@ -20,11 +20,11 @@ typedef struct {
        unsigned int irq_call_count;
        unsigned int irq_tlb_count;
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
        unsigned int irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
        unsigned int irq_threshold_count;
-# endif
 #endif
 } ____cacheline_aligned irq_cpustat_t;
 
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644 (file)
index 0000000..0675a7c
--- /dev/null
@@ -0,0 +1,73 @@
+#ifndef        _I386_HW_BREAKPOINT_H
+#define        _I386_HW_BREAKPOINT_H
+
+#ifdef __KERNEL__
+#define        __ARCH_HW_BREAKPOINT_H
+
+/*
+ * The name should probably be something dealt in
+ * a higher level. While dealing with the user
+ * (display/resolving)
+ */
+struct arch_hw_breakpoint {
+       char            *name; /* Contains name of the symbol to set bkpt */
+       unsigned long   address;
+       u8              len;
+       u8              type;
+};
+
+#include <linux/kdebug.h>
+#include <linux/percpu.h>
+#include <linux/list.h>
+
+/* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_1           0x40
+#define X86_BREAKPOINT_LEN_2           0x44
+#define X86_BREAKPOINT_LEN_4           0x4c
+#define X86_BREAKPOINT_LEN_EXECUTE     0x40
+
+#ifdef CONFIG_X86_64
+#define X86_BREAKPOINT_LEN_8           0x48
+#endif
+
+/* Available HW breakpoint type encodings */
+
+/* trigger on instruction execute */
+#define X86_BREAKPOINT_EXECUTE 0x80
+/* trigger on memory write */
+#define X86_BREAKPOINT_WRITE   0x81
+/* trigger on memory read or write */
+#define X86_BREAKPOINT_RW      0x83
+
+/* Total number of available HW breakpoint registers */
+#define HBP_NUM 4
+
+struct perf_event;
+struct pmu;
+
+extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
+                                        struct task_struct *tsk);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+                                          unsigned long val, void *data);
+
+
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
+
+extern void
+arch_fill_perf_breakpoint(struct perf_event *bp);
+
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
+
+extern int arch_bp_generic_fields(int x86_len, int x86_type,
+                                 int *gen_len, int *gen_type);
+
+extern struct pmu perf_ops_bp;
+
+#endif /* __KERNEL__ */
+#endif /* _I386_HW_BREAKPOINT_H */
+
index f1363b72364f3e2a53609e77e52379f7f3998b3a..858baa061cfce365a51e48bf162f92cb00a8e8ba 100644 (file)
@@ -108,6 +108,8 @@ struct mce_log {
 #define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
 #define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)
 
+extern struct atomic_notifier_head x86_mce_decoder_chain;
+
 #ifdef __KERNEL__
 
 #include <linux/percpu.h>
@@ -118,9 +120,11 @@ extern int mce_disabled;
 extern int mce_p5_enabled;
 
 #ifdef CONFIG_X86_MCE
-void mcheck_init(struct cpuinfo_x86 *c);
+int mcheck_init(void);
+void mcheck_cpu_init(struct cpuinfo_x86 *c);
 #else
-static inline void mcheck_init(struct cpuinfo_x86 *c) {}
+static inline int mcheck_init(void) { return 0; }
+static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
 #endif
 
 #ifdef CONFIG_X86_ANCIENT_MCE
@@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
 
 void mce_log_therm_throt_event(__u64 status);
 
+#ifdef CONFIG_X86_THERMAL_VECTOR
+extern void mcheck_intel_therm_init(void);
+#else
+static inline void mcheck_intel_therm_init(void) { }
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_MCE_H */
index c9786480f0fe4d074e9575557316d7f4b358be4b..6f8ec1c37e0a8c999f9344082465fd2c9b4570f1 100644 (file)
@@ -30,6 +30,7 @@ struct mm_struct;
 #include <linux/math64.h>
 #include <linux/init.h>
 
+#define HBP_NUM 4
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -422,6 +423,8 @@ extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
 
+struct perf_event;
+
 struct thread_struct {
        /* Cached TLS descriptors: */
        struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -443,13 +446,10 @@ struct thread_struct {
        unsigned long           fs;
 #endif
        unsigned long           gs;
-       /* Hardware debugging registers: */
-       unsigned long           debugreg0;
-       unsigned long           debugreg1;
-       unsigned long           debugreg2;
-       unsigned long           debugreg3;
-       unsigned long           debugreg6;
-       unsigned long           debugreg7;
+       /* Save middle states of ptrace breakpoints */
+       struct perf_event       *ptrace_bps[HBP_NUM];
+       /* Debug status used for traps, single steps, etc... */
+       unsigned long           debugreg6;
        /* Fault info: */
        unsigned long           cr2;
        unsigned long           trap_no;
index a3d49dd7d26e1765d6345bf03a861bcdd91238a5..3d11fd0f44c5f4f86c060207417710db58758094 100644 (file)
@@ -227,8 +227,8 @@ extern const char *regs_query_register_name(unsigned int offset);
  * @regs:      pt_regs from which register value is gotten.
  * @offset:    offset number of the register.
  *
- * regs_get_register returns the value of a register whose offset from @regs
- * is @offset. The @offset is the offset of the register in struct pt_regs.
+ * regs_get_register returns the value of a register. The @offset is the
+ * offset of the register in struct pt_regs address which specified by @regs.
  * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
  */
 static inline unsigned long regs_get_register(struct pt_regs *regs,
@@ -244,7 +244,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
  * @regs:      pt_regs which contains kernel stack pointer.
  * @addr:      address which is checked.
  *
- * regs_within_kenel_stack() checks @addr is within the kernel stack page(s).
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
  * If @addr is within the kernel stack, it returns true. If not, returns false.
  */
 static inline int regs_within_kernel_stack(struct pt_regs *regs,
@@ -260,7 +260,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
  * @n:         stack entry number.
  *
  * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
  * this returns 0.
  */
 static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
index d8e5d0cdd678d3b4396c0e7f859b7c3f6ac0d212..4f2e66e29ecc5cc35e749f18e194b2ad11f398d2 100644 (file)
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64)  += sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)   += syscall_64.o vsyscall_64.o
 obj-y                  += bootflag.o e820.o
 obj-y                  += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
-obj-y                  += alternative.o i8253.o pci-nommu.o
+obj-y                  += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y                  += tsc.o io_delay.o rtc.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline.o
index 68537e957a9b2cd621185e275103c201af9502e6..1d2cb383410ebef206fb48e2ccecbe8fd03f90ce 100644 (file)
@@ -5,6 +5,7 @@
 # Don't trace early stages of a secondary CPU boot
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_common.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
 endif
 
 # Make sure load_percpu_segment has no stackprotector
index cc25c2b4a567c2ca3e020127cefe87b2778f02ee..9053be5d95cd4fb21f4aae94f2e7335af6a6f83a 100644 (file)
@@ -837,10 +837,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
                        boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
        }
 
-#ifdef CONFIG_X86_MCE
        /* Init Machine Check Exception if available. */
-       mcheck_init(c);
-#endif
+       mcheck_cpu_init(c);
 
        select_idle_routine(c);
 
index 721a77ca811536eb2129e02449701948fe9aa9c3..0bcaa3875863aaefa98942e00d93ae5be0fecdb6 100644 (file)
@@ -46,6 +46,9 @@
 
 #include "mce-internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/mce.h>
+
 int mce_disabled __read_mostly;
 
 #define MISC_MCELOG_MINOR      227
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int                     cpu_missing;
 
-static void default_decode_mce(struct mce *m)
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
+
+static int default_decode_mce(struct notifier_block *nb, unsigned long val,
+                              void *data)
 {
        pr_emerg("No human readable MCE decoding support on this CPU type.\n");
        pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+
+       return NOTIFY_STOP;
 }
 
-/*
- * CPU/chipset specific EDAC code can register a callback here to print
- * MCE errors in a human-readable form:
- */
-void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
-EXPORT_SYMBOL(x86_mce_decode_callback);
+static struct notifier_block mce_dec_nb = {
+       .notifier_call = default_decode_mce,
+       .priority      = -1,
+};
 
 /* MCA banks polled by the period polling timer for corrected events */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce)
 {
        unsigned next, entry;
 
+       /* Emit the trace record: */
+       trace_mce_record(mce);
+
        mce->finished = 0;
        wmb();
        for (;;) {
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m)
 
        /*
         * Print out human-readable details about the MCE error,
-        * (if the CPU has an implementation for that):
+        * (if the CPU has an implementation for that)
         */
-       x86_mce_decode_callback(m);
+       atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
 }
 
 static void print_mce_head(void)
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
 static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
 
-static void mcheck_timer(unsigned long data)
+static void mce_start_timer(unsigned long data)
 {
        struct timer_list *t = &per_cpu(mce_timer, data);
        int *n;
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void)
 }
 EXPORT_SYMBOL_GPL(mce_notify_irq);
 
-static int mce_banks_init(void)
+static int __cpuinit __mcheck_cpu_mce_banks_init(void)
 {
        int i;
 
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void)
 /*
  * Initialize Machine Checks for a CPU.
  */
-static int __cpuinit mce_cap_init(void)
+static int __cpuinit __mcheck_cpu_cap_init(void)
 {
        unsigned b;
        u64 cap;
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void)
        WARN_ON(banks != 0 && b != banks);
        banks = b;
        if (!mce_banks) {
-               int err = mce_banks_init();
+               int err = __mcheck_cpu_mce_banks_init();
 
                if (err)
                        return err;
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void)
        return 0;
 }
 
-static void mce_init(void)
+static void __mcheck_cpu_init_generic(void)
 {
        mce_banks_t all_banks;
        u64 cap;
@@ -1273,7 +1287,7 @@ static void mce_init(void)
 }
 
 /* Add per CPU specific workarounds here */
-static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
+static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 {
        if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
                pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
        return 0;
 }
 
-static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
+static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
 {
        if (c->x86 != 5)
                return;
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
        }
 }
 
-static void mce_cpu_features(struct cpuinfo_x86 *c)
+static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 {
        switch (c->x86_vendor) {
        case X86_VENDOR_INTEL:
@@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
        }
 }
 
-static void mce_init_timer(void)
+static void __mcheck_cpu_init_timer(void)
 {
        struct timer_list *t = &__get_cpu_var(mce_timer);
        int *n = &__get_cpu_var(mce_next_interval);
@@ -1380,7 +1394,7 @@ static void mce_init_timer(void)
        *n = check_interval * HZ;
        if (!*n)
                return;
-       setup_timer(t, mcheck_timer, smp_processor_id());
+       setup_timer(t, mce_start_timer, smp_processor_id());
        t->expires = round_jiffies(jiffies + *n);
        add_timer_on(t, smp_processor_id());
 }
@@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off:
  */
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
 {
        if (mce_disabled)
                return;
 
-       mce_ancient_init(c);
+       __mcheck_cpu_ancient_init(c);
 
        if (!mce_available(c))
                return;
 
-       if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
+       if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
                mce_disabled = 1;
                return;
        }
 
        machine_check_vector = do_machine_check;
 
-       mce_init();
-       mce_cpu_features(c);
-       mce_init_timer();
+       __mcheck_cpu_init_generic();
+       __mcheck_cpu_init_vendor(c);
+       __mcheck_cpu_init_timer();
        INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
+
 }
 
 /*
@@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str)
 }
 __setup("mce", mcheck_enable);
 
+int __init mcheck_init(void)
+{
+       atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
+
+       mcheck_intel_therm_init();
+
+       return 0;
+}
+
 /*
  * Sysfs support
  */
@@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable);
  * Disable machine checks on suspend and shutdown. We can't really handle
  * them later.
  */
-static int mce_disable(void)
+static int mce_disable_error_reporting(void)
 {
        int i;
 
@@ -1663,12 +1687,12 @@ static int mce_disable(void)
 
 static int mce_suspend(struct sys_device *dev, pm_message_t state)
 {
-       return mce_disable();
+       return mce_disable_error_reporting();
 }
 
 static int mce_shutdown(struct sys_device *dev)
 {
-       return mce_disable();
+       return mce_disable_error_reporting();
 }
 
 /*
@@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev)
  */
 static int mce_resume(struct sys_device *dev)
 {
-       mce_init();
-       mce_cpu_features(&current_cpu_data);
+       __mcheck_cpu_init_generic();
+       __mcheck_cpu_init_vendor(&current_cpu_data);
 
        return 0;
 }
@@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data)
        del_timer_sync(&__get_cpu_var(mce_timer));
        if (!mce_available(&current_cpu_data))
                return;
-       mce_init();
-       mce_init_timer();
+       __mcheck_cpu_init_generic();
+       __mcheck_cpu_init_timer();
 }
 
 /* Reinit MCEs after user configuration changes */
@@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all)
        cmci_reenable();
        cmci_recheck();
        if (all)
-               mce_init_timer();
+               __mcheck_cpu_init_timer();
 }
 
 static struct sysdev_class mce_sysclass = {
@@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
 }
 
 /* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
+static void __cpuinit mce_disable_cpu(void *h)
 {
        unsigned long action = *(unsigned long *)h;
        int i;
 
        if (!mce_available(&current_cpu_data))
                return;
+
        if (!(action & CPU_TASKS_FROZEN))
                cmci_clear();
        for (i = 0; i < banks; i++) {
@@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h)
        }
 }
 
-static void mce_reenable_cpu(void *h)
+static void __cpuinit mce_reenable_cpu(void *h)
 {
        unsigned long action = *(unsigned long *)h;
        int i;
@@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void)
        }
 }
 
-static __init int mce_init_device(void)
+static __init int mcheck_init_device(void)
 {
        int err;
        int i = 0;
@@ -2053,7 +2078,7 @@ static __init int mce_init_device(void)
        return err;
 }
 
-device_initcall(mce_init_device);
+device_initcall(mcheck_init_device);
 
 /*
  * Old style boot options parsing. Only for compatibility.
@@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val)
 DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
                        fake_panic_set, "%llu\n");
 
-static int __init mce_debugfs_init(void)
+static int __init mcheck_debugfs_init(void)
 {
        struct dentry *dmce, *ffake_panic;
 
@@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void)
 
        return 0;
 }
-late_initcall(mce_debugfs_init);
+late_initcall(mcheck_debugfs_init);
 #endif
index b3a1dba75330a4891b7d49739bd8507076a642da..4fef985fc221622623473c25e9abadda053095c9 100644 (file)
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state);
 
 static atomic_t therm_throt_en = ATOMIC_INIT(0);
 
+static u32 lvtthmr_init __read_mostly;
+
 #ifdef CONFIG_SYSFS
 #define define_therm_throt_sysdev_one_ro(_name)                                \
        static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
@@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
        ack_APIC_irq();
 }
 
+void __init mcheck_intel_therm_init(void)
+{
+       /*
+        * This function is only called on boot CPU. Save the init thermal
+        * LVT value on BSP and use that value to restore APs' thermal LVT
+        * entry BIOS programmed later
+        */
+       if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
+               cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
+               lvtthmr_init = apic_read(APIC_LVTTHMR);
+}
+
 void intel_init_thermal(struct cpuinfo_x86 *c)
 {
        unsigned int cpu = smp_processor_id();
@@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
         * since it might be delivered via SMI already:
         */
        rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-       h = apic_read(APIC_LVTTHMR);
+
+       /*
+        * The initial value of thermal LVT entries on all APs always reads
+        * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
+        * sequence to them and LVT registers are reset to 0s except for
+        * the mask bits which are set to 1s when APs receive INIT IPI.
+        * Always restore the value that BIOS has programmed on AP based on
+        * BSP's info we saved since BIOS is always setting the same value
+        * for all threads/cores
+        */
+       apic_write(APIC_LVTTHMR, lvtthmr_init);
+
+       h = lvtthmr_init;
+
        if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
                printk(KERN_DEBUG
                       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
index bd8743024204bd126b31b8cfd39f85892a399bf8..c1bbed1021d96c63e96593f1687d50bc7716b22e 100644 (file)
@@ -2229,10 +2229,10 @@ validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
        struct hw_perf_event fake_event = event->hw;
 
-       if (event->pmu != &pmu)
+       if (event->pmu && event->pmu != &pmu)
                return 0;
 
-       return x86_schedule_event(cpuc, &fake_event);
+       return x86_schedule_event(cpuc, &fake_event) >= 0;
 }
 
 static int validate_group(struct perf_event *event)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644 (file)
index 0000000..d42f65a
--- /dev/null
@@ -0,0 +1,555 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) 2009 IBM Corporation
+ * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Authors: Alan Stern <stern@rowland.harvard.edu>
+ *          K.Prasad <prasad@linux.vnet.ibm.com>
+ *          Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/debugreg.h>
+
+/* Per cpu debug control register value */
+DEFINE_PER_CPU(unsigned long, cpu_dr7);
+EXPORT_PER_CPU_SYMBOL(cpu_dr7);
+
+/* Per cpu debug address registers values */
+static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for each cpus
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
+
+
+static inline unsigned long
+__encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+       unsigned long bp_info;
+
+       bp_info = (len | type) & 0xf;
+       bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
+       bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
+
+       return bp_info;
+}
+
+/*
+ * Encode the length, type, Exact, and Enable bits for a particular breakpoint
+ * as stored in debug register 7.
+ */
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+       return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
+}
+
+/*
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7.  Return the "enabled" status.
+ */
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
+{
+       int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+
+       *len = (bp_info & 0xc) | 0x40;
+       *type = (bp_info & 0x3) | 0x80;
+
+       return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint. Eventually we enable it in the debug control register.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+       unsigned long *dr7;
+       int i;
+
+       for (i = 0; i < HBP_NUM; i++) {
+               struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+               if (!*slot) {
+                       *slot = bp;
+                       break;
+               }
+       }
+
+       if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+               return -EBUSY;
+
+       set_debugreg(info->address, i);
+       __get_cpu_var(cpu_debugreg[i]) = info->address;
+
+       dr7 = &__get_cpu_var(cpu_dr7);
+       *dr7 |= encode_dr7(i, info->len, info->type);
+
+       set_debugreg(*dr7, 7);
+
+       return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+       unsigned long *dr7;
+       int i;
+
+       for (i = 0; i < HBP_NUM; i++) {
+               struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+               if (*slot == bp) {
+                       *slot = NULL;
+                       break;
+               }
+       }
+
+       if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+               return;
+
+       dr7 = &__get_cpu_var(cpu_dr7);
+       *dr7 &= ~__encode_dr7(i, info->len, info->type);
+
+       set_debugreg(*dr7, 7);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+       unsigned int len_in_bytes = 0;
+
+       switch (hbp_len) {
+       case X86_BREAKPOINT_LEN_1:
+               len_in_bytes = 1;
+               break;
+       case X86_BREAKPOINT_LEN_2:
+               len_in_bytes = 2;
+               break;
+       case X86_BREAKPOINT_LEN_4:
+               len_in_bytes = 4;
+               break;
+#ifdef CONFIG_X86_64
+       case X86_BREAKPOINT_LEN_8:
+               len_in_bytes = 8;
+               break;
+#endif
+       }
+       return len_in_bytes;
+}
+
+/*
+ * Check for virtual address in user space.
+ */
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
+{
+       unsigned int len;
+
+       len = get_hbp_len(hbp_len);
+
+       return (va <= TASK_SIZE - len);
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+{
+       unsigned int len;
+
+       len = get_hbp_len(hbp_len);
+
+       return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Store a breakpoint's encoded address, length, and type.
+ */
+static int arch_store_info(struct perf_event *bp)
+{
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+       /*
+        * For kernel-addresses, either the address or symbol name can be
+        * specified.
+        */
+       if (info->name)
+               info->address = (unsigned long)
+                               kallsyms_lookup_name(info->name);
+       if (info->address)
+               return 0;
+
+       return -EINVAL;
+}
+
+int arch_bp_generic_fields(int x86_len, int x86_type,
+                          int *gen_len, int *gen_type)
+{
+       /* Len */
+       switch (x86_len) {
+       case X86_BREAKPOINT_LEN_1:
+               *gen_len = HW_BREAKPOINT_LEN_1;
+               break;
+       case X86_BREAKPOINT_LEN_2:
+               *gen_len = HW_BREAKPOINT_LEN_2;
+               break;
+       case X86_BREAKPOINT_LEN_4:
+               *gen_len = HW_BREAKPOINT_LEN_4;
+               break;
+#ifdef CONFIG_X86_64
+       case X86_BREAKPOINT_LEN_8:
+               *gen_len = HW_BREAKPOINT_LEN_8;
+               break;
+#endif
+       default:
+               return -EINVAL;
+       }
+
+       /* Type */
+       switch (x86_type) {
+       case X86_BREAKPOINT_EXECUTE:
+               *gen_type = HW_BREAKPOINT_X;
+               break;
+       case X86_BREAKPOINT_WRITE:
+               *gen_type = HW_BREAKPOINT_W;
+               break;
+       case X86_BREAKPOINT_RW:
+               *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+
+static int arch_build_bp_info(struct perf_event *bp)
+{
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+       info->address = bp->attr.bp_addr;
+
+       /* Len */
+       switch (bp->attr.bp_len) {
+       case HW_BREAKPOINT_LEN_1:
+               info->len = X86_BREAKPOINT_LEN_1;
+               break;
+       case HW_BREAKPOINT_LEN_2:
+               info->len = X86_BREAKPOINT_LEN_2;
+               break;
+       case HW_BREAKPOINT_LEN_4:
+               info->len = X86_BREAKPOINT_LEN_4;
+               break;
+#ifdef CONFIG_X86_64
+       case HW_BREAKPOINT_LEN_8:
+               info->len = X86_BREAKPOINT_LEN_8;
+               break;
+#endif
+       default:
+               return -EINVAL;
+       }
+
+       /* Type */
+       switch (bp->attr.bp_type) {
+       case HW_BREAKPOINT_W:
+               info->type = X86_BREAKPOINT_WRITE;
+               break;
+       case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+               info->type = X86_BREAKPOINT_RW;
+               break;
+       case HW_BREAKPOINT_X:
+               info->type = X86_BREAKPOINT_EXECUTE;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp,
+                                 struct task_struct *tsk)
+{
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+       unsigned int align;
+       int ret;
+
+
+       ret = arch_build_bp_info(bp);
+       if (ret)
+               return ret;
+
+       ret = -EINVAL;
+
+       if (info->type == X86_BREAKPOINT_EXECUTE)
+               /*
+                * Ptrace-refactoring code
+                * For now, we'll allow instruction breakpoint only for user-space
+                * addresses
+                */
+               if ((!arch_check_va_in_userspace(info->address, info->len)) &&
+                       info->len != X86_BREAKPOINT_EXECUTE)
+                       return ret;
+
+       switch (info->len) {
+       case X86_BREAKPOINT_LEN_1:
+               align = 0;
+               break;
+       case X86_BREAKPOINT_LEN_2:
+               align = 1;
+               break;
+       case X86_BREAKPOINT_LEN_4:
+               align = 3;
+               break;
+#ifdef CONFIG_X86_64
+       case X86_BREAKPOINT_LEN_8:
+               align = 7;
+               break;
+#endif
+       default:
+               return ret;
+       }
+
+       if (bp->callback)
+               ret = arch_store_info(bp);
+
+       if (ret < 0)
+               return ret;
+       /*
+        * Check that the low-order bits of the address are appropriate
+        * for the alignment implied by len.
+        */
+       if (info->address & align)
+               return -EINVAL;
+
+       /* Check that the virtual address is in the proper range */
+       if (tsk) {
+               if (!arch_check_va_in_userspace(info->address, info->len))
+                       return -EFAULT;
+       } else {
+               if (!arch_check_va_in_kernelspace(info->address, info->len))
+                       return -EFAULT;
+       }
+
+       return 0;
+}
+
+/*
+ * Dump the debug register contents to the user.
+ * We can't dump our per cpu values because it
+ * may contain cpu wide breakpoint, something that
+ * doesn't belong to the current task.
+ *
+ * TODO: include non-ptrace user breakpoints (perf)
+ */
+void aout_dump_debugregs(struct user *dump)
+{
+       int i;
+       int dr7 = 0;
+       struct perf_event *bp;
+       struct arch_hw_breakpoint *info;
+       struct thread_struct *thread = &current->thread;
+
+       for (i = 0; i < HBP_NUM; i++) {
+               bp = thread->ptrace_bps[i];
+
+               if (bp && !bp->attr.disabled) {
+                       dump->u_debugreg[i] = bp->attr.bp_addr;
+                       info = counter_arch_bp(bp);
+                       dr7 |= encode_dr7(i, info->len, info->type);
+               } else {
+                       dump->u_debugreg[i] = 0;
+               }
+       }
+
+       dump->u_debugreg[4] = 0;
+       dump->u_debugreg[5] = 0;
+       dump->u_debugreg[6] = current->thread.debugreg6;
+
+       dump->u_debugreg[7] = dr7;
+}
+EXPORT_SYMBOL_GPL(aout_dump_debugregs);
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+       int i;
+       struct thread_struct *t = &tsk->thread;
+
+       for (i = 0; i < HBP_NUM; i++) {
+               unregister_hw_breakpoint(t->ptrace_bps[i]);
+               t->ptrace_bps[i] = NULL;
+       }
+}
+
+void hw_breakpoint_restore(void)
+{
+       set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
+       set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
+       set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
+       set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+       set_debugreg(current->thread.debugreg6, 6);
+       set_debugreg(__get_cpu_var(cpu_dr7), 7);
+}
+EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
+
+/*
+ * Handle debug exception notifications.
+ *
+ * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
+ *
+ * NOTIFY_DONE returned if one of the following conditions is true.
+ * i) When the causative address is from user-space and the exception
+ * is a valid one, i.e. not triggered as a result of lazy debug register
+ * switching
+ * ii) When there are more bits than trap<n> set in DR6 register (such
+ * as BD, BS or BT) indicating that more than one debug condition is
+ * met and requires some more action in do_debug().
+ *
+ * NOTIFY_STOP returned for all other cases
+ *
+ */
+static int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+       int i, cpu, rc = NOTIFY_STOP;
+       struct perf_event *bp;
+       unsigned long dr7, dr6;
+       unsigned long *dr6_p;
+
+       /* The DR6 value is pointed by args->err */
+       dr6_p = (unsigned long *)ERR_PTR(args->err);
+       dr6 = *dr6_p;
+
+       /* Do an early return if no trap bits are set in DR6 */
+       if ((dr6 & DR_TRAP_BITS) == 0)
+               return NOTIFY_DONE;
+
+       get_debugreg(dr7, 7);
+       /* Disable breakpoints during exception handling */
+       set_debugreg(0UL, 7);
+       /*
+        * Assert that local interrupts are disabled
+        * Reset the DRn bits in the virtualized register value.
+        * The ptrace trigger routine will add in whatever is needed.
+        */
+       current->thread.debugreg6 &= ~DR_TRAP_BITS;
+       cpu = get_cpu();
+
+       /* Handle all the breakpoints that were triggered */
+       for (i = 0; i < HBP_NUM; ++i) {
+               if (likely(!(dr6 & (DR_TRAP0 << i))))
+                       continue;
+
+               /*
+                * The counter may be concurrently released but that can only
+                * occur from a call_rcu() path. We can then safely fetch
+                * the breakpoint, use its callback, touch its counter
+                * while we are in an rcu_read_lock() path.
+                */
+               rcu_read_lock();
+
+               bp = per_cpu(bp_per_reg[i], cpu);
+               if (bp)
+                       rc = NOTIFY_DONE;
+               /*
+                * Reset the 'i'th TRAP bit in dr6 to denote completion of
+                * exception handling
+                */
+               (*dr6_p) &= ~(DR_TRAP0 << i);
+               /*
+                * bp can be NULL due to lazy debug register switching
+                * or due to concurrent perf counter removing.
+                */
+               if (!bp) {
+                       rcu_read_unlock();
+                       break;
+               }
+
+               (bp->callback)(bp, args->regs);
+
+               rcu_read_unlock();
+       }
+       if (dr6 & (~DR_TRAP_BITS))
+               rc = NOTIFY_DONE;
+
+       set_debugreg(dr7, 7);
+       put_cpu();
+
+       return rc;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+               struct notifier_block *unused, unsigned long val, void *data)
+{
+       if (val != DIE_DEBUG)
+               return NOTIFY_DONE;
+
+       return hw_breakpoint_handler(data);
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+       /* TODO */
+}
+
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
+{
+       /* TODO */
+}
index 04bbd52785688c4152c449459d943a0482c68830..19212cb01558101c5fd861fc838e83230e94e15c 100644 (file)
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec)
                seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
        seq_printf(p, "  TLB shootdowns\n");
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
        seq_printf(p, "%*s: ", prec, "TRM");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
        seq_printf(p, "  Thermal event interrupts\n");
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
        seq_printf(p, "%*s: ", prec, "THR");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
        seq_printf(p, "  Threshold APIC interrupts\n");
-# endif
 #endif
 #ifdef CONFIG_X86_MCE
        seq_printf(p, "%*s: ", prec, "MCE");
@@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
        sum += irq_stats(cpu)->irq_call_count;
        sum += irq_stats(cpu)->irq_tlb_count;
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
        sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
        sum += irq_stats(cpu)->irq_threshold_count;
-# endif
 #endif
 #ifdef CONFIG_X86_MCE
        sum += per_cpu(mce_exception_count, cpu);
index 8d82a77a3f3b96ea3c0dc37e91551dddc7e10b51..34e86b67550c523d3bbefb0275ede78d526b0826 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/smp.h>
 #include <linux/nmi.h>
 
+#include <asm/debugreg.h>
 #include <asm/apicdef.h>
 #include <asm/system.h>
 
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
                        "resuming...\n");
        kgdb_arch_handle_exception(args->trapnr, args->signr,
                                   args->err, "c", "", regs);
+       /*
+        * Reset the BS bit in dr6 (pointed by args->err) to
+        * denote completion of processing
+        */
+       (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
 
        return NOTIFY_STOP;
 }
index c5f1f117e0c0577a527e788d4c80220bd49dd84d..3fe86d706a1493ad59cb655478bd9b41cfd11f4d 100644 (file)
@@ -56,6 +56,7 @@
 #include <asm/uaccess.h>
 #include <asm/alternative.h>
 #include <asm/insn.h>
+#include <asm/debugreg.h>
 
 void jprobe_return_end(void);
 
@@ -945,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
                        ret = NOTIFY_STOP;
                break;
        case DIE_DEBUG:
-               if (post_kprobe_handler(args->regs))
+               if (post_kprobe_handler(args->regs)) {
+                       /*
+                        * Reset the BS bit in dr6 (pointed by args->err) to
+                        * denote completion of processing
+                        */
+                       (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
                        ret = NOTIFY_STOP;
+               }
                break;
        case DIE_GPF:
                /*
index c1c429d00130c2a233b35f69449d5a02a9f6f2e9..c843f8406da2b95f416cc9a83c2bead2d4d17ae6 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/desc.h>
 #include <asm/system.h>
 #include <asm/cacheflush.h>
+#include <asm/debugreg.h>
 
 static void set_idt(void *newidt, __u16 limit)
 {
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
 
        /* Interrupts aren't acceptable while we reboot */
        local_irq_disable();
+       hw_breakpoint_disable();
 
        if (image->preserve_context) {
 #ifdef CONFIG_X86_IO_APIC
index 84c3bf209e98a390ff536e46fd8fec362479f22a..4a8bb82248ae8a9a945854105c19447fec8a6530 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
+#include <asm/debugreg.h>
 
 static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
                                unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
 
        /* Interrupts aren't acceptable while we reboot */
        local_irq_disable();
+       hw_breakpoint_disable();
 
        if (image->preserve_context) {
 #ifdef CONFIG_X86_IO_APIC
index 5284cd2b57769f53e79f520ecc6f8199720497cf..744508e7cfdd051e3896fe5ec28d5d3da0f3c16c 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/clockchips.h>
 #include <linux/random.h>
 #include <trace/events/power.h>
+#include <linux/hw_breakpoint.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -17,6 +18,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -103,14 +105,7 @@ void flush_thread(void)
        }
 #endif
 
-       clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
-       tsk->thread.debugreg0 = 0;
-       tsk->thread.debugreg1 = 0;
-       tsk->thread.debugreg2 = 0;
-       tsk->thread.debugreg3 = 0;
-       tsk->thread.debugreg6 = 0;
-       tsk->thread.debugreg7 = 0;
+       flush_ptrace_hw_breakpoint(tsk);
        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
        /*
         * Forget coprocessor state..
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
        else if (next->debugctlmsr != prev->debugctlmsr)
                update_debugctlmsr(next->debugctlmsr);
 
-       if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-               set_debugreg(next->debugreg0, 0);
-               set_debugreg(next->debugreg1, 1);
-               set_debugreg(next->debugreg2, 2);
-               set_debugreg(next->debugreg3, 3);
-               /* no 4 and 5 */
-               set_debugreg(next->debugreg6, 6);
-               set_debugreg(next->debugreg7, 7);
-       }
-
        if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
            test_tsk_thread_flag(next_p, TIF_NOTSC)) {
                /* prev and next are different */
index 4cf79567cdab0728b33c2f9698e3a5b535e4eb28..d5bd3132ee706d764510eec5058cf3e214cb2cc2 100644 (file)
@@ -58,6 +58,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 
        task_user_gs(p) = get_user_gs(regs);
 
+       p->thread.io_bitmap_ptr = NULL;
        tsk = current;
+       err = -ENOMEM;
+
+       memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
        if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
                p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
                                                IO_BITMAP_BYTES, GFP_KERNEL);
index eb62cbcaa490ad553ef5d70b6751a2288d25089e..70cf15873f3d65da38e42fbbb670555d2fc22722 100644 (file)
@@ -52,6 +52,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 
        p->thread.fs = me->thread.fs;
        p->thread.gs = me->thread.gs;
+       p->thread.io_bitmap_ptr = NULL;
 
        savesegment(gs, p->thread.gsindex);
        savesegment(fs, p->thread.fsindex);
        savesegment(es, p->thread.es);
        savesegment(ds, p->thread.ds);
 
+       err = -ENOMEM;
+       memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
        if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
                p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
                if (!p->thread.io_bitmap_ptr) {
@@ -341,6 +346,7 @@ out:
                kfree(p->thread.io_bitmap_ptr);
                p->thread.io_bitmap_max = 0;
        }
+
        return err;
 }
 
@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         */
        if (preload_fpu)
                __math_state_restore();
+
        return prev_p;
 }
 
index c4f76d275ee4cda38bed14268a4ac3b6f04d7436..04d182a7cfdbd6e3c88c41658f6d0a91878e6366 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/seccomp.h>
 #include <linux/signal.h>
 #include <linux/workqueue.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -34,6 +36,7 @@
 #include <asm/prctl.h>
 #include <asm/proto.h>
 #include <asm/ds.h>
+#include <asm/hw_breakpoint.h>
 
 #include "tls.h"
 
@@ -249,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
        return 0;
 }
 
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-       return TASK_SIZE - 3;
-}
-
 #else  /* CONFIG_X86_64 */
 
 #define FLAG_MASK              (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -378,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
        return 0;
 }
 
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-#ifdef CONFIG_IA32_EMULATION
-       if (test_tsk_thread_flag(task, TIF_IA32))
-               return IA32_PAGE_OFFSET - 3;
-#endif
-       return TASK_SIZE_MAX - 7;
-}
-
 #endif /* CONFIG_X86_32 */
 
 static unsigned long get_flags(struct task_struct *task)
@@ -566,98 +555,238 @@ static int genregs_set(struct task_struct *target,
        return ret;
 }
 
+static void ptrace_triggered(struct perf_event *bp, void *data)
+{
+       int i;
+       struct thread_struct *thread = &(current->thread);
+
+       /*
+        * Store in the virtual DR6 register the fact that the breakpoint
+        * was hit so the thread's debugger will see it.
+        */
+       for (i = 0; i < HBP_NUM; i++) {
+               if (thread->ptrace_bps[i] == bp)
+                       break;
+       }
+
+       thread->debugreg6 |= (DR_TRAP0 << i);
+}
+
 /*
- * This function is trivial and will be inlined by the compiler.
- * Having it separates the implementation details of debug
- * registers from the interface details of ptrace.
+ * Walk through every ptrace breakpoints for this thread and
+ * build the dr7 value on top of their attributes.
+ *
  */
-static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+static unsigned long ptrace_get_dr7(struct perf_event *bp[])
 {
-       switch (n) {
-       case 0:         return child->thread.debugreg0;
-       case 1:         return child->thread.debugreg1;
-       case 2:         return child->thread.debugreg2;
-       case 3:         return child->thread.debugreg3;
-       case 6:         return child->thread.debugreg6;
-       case 7:         return child->thread.debugreg7;
+       int i;
+       int dr7 = 0;
+       struct arch_hw_breakpoint *info;
+
+       for (i = 0; i < HBP_NUM; i++) {
+               if (bp[i] && !bp[i]->attr.disabled) {
+                       info = counter_arch_bp(bp[i]);
+                       dr7 |= encode_dr7(i, info->len, info->type);
+               }
        }
-       return 0;
+
+       return dr7;
 }
 
-static int ptrace_set_debugreg(struct task_struct *child,
-                              int n, unsigned long data)
+static struct perf_event *
+ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+                        struct task_struct *tsk, int disabled)
 {
-       int i;
+       int err;
+       int gen_len, gen_type;
+       DEFINE_BREAKPOINT_ATTR(attr);
 
-       if (unlikely(n == 4 || n == 5))
-               return -EIO;
+       /*
+        * We shoud have at least an inactive breakpoint at this
+        * slot. It means the user is writing dr7 without having
+        * written the address register first
+        */
+       if (!bp)
+               return ERR_PTR(-EINVAL);
 
-       if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
-               return -EIO;
+       err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+       if (err)
+               return ERR_PTR(err);
 
-       switch (n) {
-       case 0:         child->thread.debugreg0 = data; break;
-       case 1:         child->thread.debugreg1 = data; break;
-       case 2:         child->thread.debugreg2 = data; break;
-       case 3:         child->thread.debugreg3 = data; break;
+       attr = bp->attr;
+       attr.bp_len = gen_len;
+       attr.bp_type = gen_type;
+       attr.disabled = disabled;
 
-       case 6:
-               if ((data & ~0xffffffffUL) != 0)
-                       return -EIO;
-               child->thread.debugreg6 = data;
-               break;
+       return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+}
+
+/*
+ * Handle ptrace writes to debug register 7.
+ */
+static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
+{
+       struct thread_struct *thread = &(tsk->thread);
+       unsigned long old_dr7;
+       int i, orig_ret = 0, rc = 0;
+       int enabled, second_pass = 0;
+       unsigned len, type;
+       struct perf_event *bp;
+
+       data &= ~DR_CONTROL_RESERVED;
+       old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
+restore:
+       /*
+        * Loop through all the hardware breakpoints, making the
+        * appropriate changes to each.
+        */
+       for (i = 0; i < HBP_NUM; i++) {
+               enabled = decode_dr7(data, i, &len, &type);
+               bp = thread->ptrace_bps[i];
+
+               if (!enabled) {
+                       if (bp) {
+                               /*
+                                * Don't unregister the breakpoints right-away,
+                                * unless all register_user_hw_breakpoint()
+                                * requests have succeeded. This prevents
+                                * any window of opportunity for debug
+                                * register grabbing by other users.
+                                */
+                               if (!second_pass)
+                                       continue;
+
+                               thread->ptrace_bps[i] = NULL;
+                               bp = ptrace_modify_breakpoint(bp, len, type,
+                                                             tsk, 1);
+                               if (IS_ERR(bp)) {
+                                       rc = PTR_ERR(bp);
+                                       thread->ptrace_bps[i] = NULL;
+                                       break;
+                               }
+                               thread->ptrace_bps[i] = bp;
+                       }
+                       continue;
+               }
+
+               bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
 
-       case 7:
+               /* Incorrect bp, or we have a bug in bp API */
+               if (IS_ERR(bp)) {
+                       rc = PTR_ERR(bp);
+                       thread->ptrace_bps[i] = NULL;
+                       break;
+               }
+               thread->ptrace_bps[i] = bp;
+       }
+       /*
+        * Make a second pass to free the remaining unused breakpoints
+        * or to restore the original breakpoints if an error occurred.
+        */
+       if (!second_pass) {
+               second_pass = 1;
+               if (rc < 0) {
+                       orig_ret = rc;
+                       data = old_dr7;
+               }
+               goto restore;
+       }
+       return ((orig_ret < 0) ? orig_ret : rc);
+}
+
+/*
+ * Handle PTRACE_PEEKUSR calls for the debug register area.
+ */
+static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
+{
+       struct thread_struct *thread = &(tsk->thread);
+       unsigned long val = 0;
+
+       if (n < HBP_NUM) {
+               struct perf_event *bp;
+               bp = thread->ptrace_bps[n];
+               if (!bp)
+                       return 0;
+               val = bp->hw.info.address;
+       } else if (n == 6) {
+               val = thread->debugreg6;
+        } else if (n == 7) {
+               val = ptrace_get_dr7(thread->ptrace_bps);
+       }
+       return val;
+}
+
+static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
+                                     unsigned long addr)
+{
+       struct perf_event *bp;
+       struct thread_struct *t = &tsk->thread;
+       DEFINE_BREAKPOINT_ATTR(attr);
+
+       if (!t->ptrace_bps[nr]) {
                /*
-                * Sanity-check data. Take one half-byte at once with
-                * check = (val >> (16 + 4*i)) & 0xf. It contains the
-                * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
-                * 2 and 3 are LENi. Given a list of invalid values,
-                * we do mask |= 1 << invalid_value, so that
-                * (mask >> check) & 1 is a correct test for invalid
-                * values.
-                *
-                * R/Wi contains the type of the breakpoint /
-                * watchpoint, LENi contains the length of the watched
-                * data in the watchpoint case.
-                *
-                * The invalid values are:
-                * - LENi == 0x10 (undefined), so mask |= 0x0f00.       [32-bit]
-                * - R/Wi == 0x10 (break on I/O reads or writes), so
-                *   mask |= 0x4444.
-                * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
-                *   0x1110.
-                *
-                * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
-                *
-                * See the Intel Manual "System Programming Guide",
-                * 15.2.4
-                *
-                * Note that LENi == 0x10 is defined on x86_64 in long
-                * mode (i.e. even for 32-bit userspace software, but
-                * 64-bit kernel), so the x86_64 mask value is 0x5454.
-                * See the AMD manual no. 24593 (AMD64 System Programming)
+                * Put stub len and type to register (reserve) an inactive but
+                * correct bp
                 */
-#ifdef CONFIG_X86_32
-#define        DR7_MASK        0x5f54
-#else
-#define        DR7_MASK        0x5554
-#endif
-               data &= ~DR_CONTROL_RESERVED;
-               for (i = 0; i < 4; i++)
-                       if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
-                               return -EIO;
-               child->thread.debugreg7 = data;
-               if (data)
-                       set_tsk_thread_flag(child, TIF_DEBUG);
-               else
-                       clear_tsk_thread_flag(child, TIF_DEBUG);
-               break;
+               attr.bp_addr = addr;
+               attr.bp_len = HW_BREAKPOINT_LEN_1;
+               attr.bp_type = HW_BREAKPOINT_W;
+               attr.disabled = 1;
+
+               bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
+       } else {
+               bp = t->ptrace_bps[nr];
+               t->ptrace_bps[nr] = NULL;
+
+               attr = bp->attr;
+               attr.bp_addr = addr;
+               bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
        }
+       /*
+        * CHECKME: the previous code returned -EIO if the addr wasn't a
+        * valid task virtual addr. The new one will return -EINVAL in this
+        * case.
+        * -EINVAL may be what we want for in-kernel breakpoints users, but
+        * -EIO looks better for ptrace, since we refuse a register writing
+        * for the user. And anyway this is the previous behaviour.
+        */
+       if (IS_ERR(bp))
+               return PTR_ERR(bp);
+
+       t->ptrace_bps[nr] = bp;
 
        return 0;
 }
 
+/*
+ * Handle PTRACE_POKEUSR calls for the debug register area.
+ */
+int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
+{
+       struct thread_struct *thread = &(tsk->thread);
+       int rc = 0;
+
+       /* There are no DR4 or DR5 registers */
+       if (n == 4 || n == 5)
+               return -EIO;
+
+       if (n == 6) {
+               thread->debugreg6 = val;
+               goto ret_path;
+       }
+       if (n < HBP_NUM) {
+               rc = ptrace_set_breakpoint_addr(tsk, n, val);
+               if (rc)
+                       return rc;
+       }
+       /* All that's left is DR7 */
+       if (n == 7)
+               rc = ptrace_write_dr7(tsk, val);
+
+ret_path:
+       return rc;
+}
+
 /*
  * These access the current or another (stopped) task's io permission
  * bitmap for debugging or core dump.
index 2a34f9c5be214fd428c4197a1f09819d65759a37..c0ca8f921c91f24091cb89af9fad7360c6312313 100644 (file)
 #ifdef CONFIG_X86_64
 #include <asm/numa_64.h>
 #endif
+#include <asm/mce.h>
 
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
        x86_init.oem.banner();
+
+       mcheck_init();
 }
 
 #ifdef CONFIG_X86_32
index 6a44a76055adcb781572978d1e332a9923f7e2d6..fbf3b07c856740805f0dbe64a85fa0d2131df37e 100644 (file)
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
 
        signr = get_signal_to_deliver(&info, &ka, regs, NULL);
        if (signr > 0) {
-               /*
-                * Re-enable any watchpoints before delivering the
-                * signal to user space. The processor register will
-                * have been cleared if the watchpoint triggered
-                * inside the kernel.
-                */
-               if (current->thread.debugreg7)
-                       set_debugreg(current->thread.debugreg7, 7);
-
                /* Whee! Actually deliver the signal.  */
                if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
                        /*
index 7e37dcee0cc352df1104e211d77cce2f99a9e4e8..33399176512a8a2c4c718d53ad76bdea631bd46e 100644 (file)
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 {
        struct task_struct *tsk = current;
-       unsigned long condition;
+       unsigned long dr6;
        int si_code;
 
-       get_debugreg(condition, 6);
+       get_debugreg(dr6, 6);
 
        /* Catch kmemcheck conditions first of all! */
-       if (condition & DR_STEP && kmemcheck_trap(regs))
+       if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
                return;
 
+       /* DR6 may or may not be cleared by the CPU */
+       set_debugreg(0, 6);
        /*
         * The processor cleared BTF, so don't mark that we need it set.
         */
        clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
        tsk->thread.debugctlmsr = 0;
 
-       if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
-                                               SIGTRAP) == NOTIFY_STOP)
+       /* Store the virtualized DR6 value */
+       tsk->thread.debugreg6 = dr6;
+
+       if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+                                                       SIGTRAP) == NOTIFY_STOP)
                return;
 
        /* It's safe to allow irq's after DR6 has been saved */
        preempt_conditional_sti(regs);
 
-       /* Mask out spurious debug traps due to lazy DR7 setting */
-       if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
-               if (!tsk->thread.debugreg7)
-                       goto clear_dr7;
+       if (regs->flags & X86_VM_MASK) {
+               handle_vm86_trap((struct kernel_vm86_regs *) regs,
+                               error_code, 1);
+               return;
        }
 
-#ifdef CONFIG_X86_32
-       if (regs->flags & X86_VM_MASK)
-               goto debug_vm86;
-#endif
-
-       /* Save debug status register where ptrace can see it */
-       tsk->thread.debugreg6 = condition;
-
        /*
-        * Single-stepping through TF: make sure we ignore any events in
-        * kernel space (but re-enable TF when returning to user mode).
+        * Single-stepping through system calls: ignore any exceptions in
+        * kernel space, but re-enable TF when returning to user mode.
+        *
+        * We already checked v86 mode above, so we can check for kernel mode
+        * by just checking the CPL of CS.
         */
-       if (condition & DR_STEP) {
-               if (!user_mode(regs))
-                       goto clear_TF_reenable;
+       if ((dr6 & DR_STEP) && !user_mode(regs)) {
+               tsk->thread.debugreg6 &= ~DR_STEP;
+               set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+               regs->flags &= ~X86_EFLAGS_TF;
        }
-
-       si_code = get_si_code(condition);
-       /* Ok, finally something we can handle */
-       send_sigtrap(tsk, regs, error_code, si_code);
-
-       /*
-        * Disable additional traps. They'll be re-enabled when
-        * the signal is delivered.
-        */
-clear_dr7:
-       set_debugreg(0, 7);
+       si_code = get_si_code(tsk->thread.debugreg6);
+       if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+               send_sigtrap(tsk, regs, error_code, si_code);
        preempt_conditional_cli(regs);
-       return;
 
-#ifdef CONFIG_X86_32
-debug_vm86:
-       /* reenable preemption: handle_vm86_trap() might sleep */
-       dec_preempt_count();
-       handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
-       conditional_cli(regs);
-       return;
-#endif
-
-clear_TF_reenable:
-       set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-       regs->flags &= ~X86_EFLAGS_TF;
-       preempt_conditional_cli(regs);
        return;
 }
 
index ae07d261527cba458ed1682118b19295bc997847..4fc80174191ce4b17549b643fe11dee645286c3f 100644 (file)
@@ -42,6 +42,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+#include <asm/debugreg.h>
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        trace_kvm_entry(vcpu->vcpu_id);
        kvm_x86_ops->run(vcpu, kvm_run);
 
-       if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
-               set_debugreg(current->thread.debugreg0, 0);
-               set_debugreg(current->thread.debugreg1, 1);
-               set_debugreg(current->thread.debugreg2, 2);
-               set_debugreg(current->thread.debugreg3, 3);
-               set_debugreg(current->thread.debugreg6, 6);
-               set_debugreg(current->thread.debugreg7, 7);
-       }
+       /*
+        * If the guest has used debug registers, at least dr7
+        * will be disabled while returning to the host.
+        * If we don't have active breakpoints in the host, we don't
+        * care about the messed up debug address registers. But if
+        * we have some of them active, restore the old state.
+        */
+       if (hw_breakpoint_active())
+               hw_breakpoint_restore();
 
        set_bit(KVM_REQ_KICK, &vcpu->requests);
        local_irq_enable();
index 16ccbd77917f22c1693b9b41fcb8dc7485acee39..11a4ad4d62530ff58b7bfa56d086ad69e3fc0bd1 100644 (file)
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
        struct die_args *arg = args;
 
        if (val == DIE_DEBUG && (arg->err & DR_STEP))
-               if (post_kmmio_handler(arg->err, arg->regs) == 1)
+               if (post_kmmio_handler(arg->err, arg->regs) == 1) {
+                       /*
+                        * Reset the BS bit in dr6 (pointed by args->err) to
+                        * denote completion of processing
+                        */
+                       (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
                        return NOTIFY_STOP;
+               }
 
        return NOTIFY_DONE;
 }
index 8aa85f17667e5034cfd2fae3eecd217c878d0529..0a979f3e5b8a7596aaf7d402cf73b0bb7eace8a9 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/mce.h>
 #include <asm/xcr.h>
 #include <asm/suspend.h>
+#include <asm/debugreg.h>
 
 #ifdef CONFIG_X86_32
 static struct saved_context saved_context;
@@ -142,31 +143,6 @@ static void fix_processor_context(void)
 #endif
        load_TR_desc();                         /* This does ltr */
        load_LDT(&current->active_mm->context); /* This does lldt */
-
-       /*
-        * Now maybe reload the debug registers
-        */
-       if (current->thread.debugreg7) {
-#ifdef CONFIG_X86_32
-               set_debugreg(current->thread.debugreg0, 0);
-               set_debugreg(current->thread.debugreg1, 1);
-               set_debugreg(current->thread.debugreg2, 2);
-               set_debugreg(current->thread.debugreg3, 3);
-               /* no 4 and 5 */
-               set_debugreg(current->thread.debugreg6, 6);
-               set_debugreg(current->thread.debugreg7, 7);
-#else
-               /* CONFIG_X86_64 */
-               loaddebug(&current->thread, 0);
-               loaddebug(&current->thread, 1);
-               loaddebug(&current->thread, 2);
-               loaddebug(&current->thread, 3);
-               /* no 4 and 5 */
-               loaddebug(&current->thread, 6);
-               loaddebug(&current->thread, 7);
-#endif
-       }
-
 }
 
 /**
index af75e07217ba5616f8cebed4951624bc6787594d..d8214dc03fa7a46e4dc2c9408b8674a13017e1e0 100644 (file)
@@ -114,6 +114,7 @@ int main(int argc, char **argv)
        unsigned char insn_buf[16];
        struct insn insn;
        int insns = 0, c;
+       int warnings = 0;
 
        parse_args(argc, argv);
 
@@ -151,18 +152,22 @@ int main(int argc, char **argv)
                insn_init(&insn, insn_buf, x86_64);
                insn_get_length(&insn);
                if (insn.length != nb) {
-                       fprintf(stderr, "Error: %s found a difference at %s\n",
+                       warnings++;
+                       fprintf(stderr, "Warning: %s found difference at %s\n",
                                prog, sym);
-                       fprintf(stderr, "Error: %s", line);
-                       fprintf(stderr, "Error: objdump says %d bytes, but "
+                       fprintf(stderr, "Warning: %s", line);
+                       fprintf(stderr, "Warning: objdump says %d bytes, but "
                                "insn_get_length() says %d\n", nb,
                                insn.length);
                        if (verbose)
                                dump_insn(stderr, &insn);
-                       exit(2);
                }
        }
-       fprintf(stderr, "Succeed: decoded and checked %d instructions\n",
-               insns);
+       if (warnings)
+               fprintf(stderr, "Warning: decoded and checked %d"
+                       " instructions with %d warnings\n", insns, warnings);
+       else
+               fprintf(stderr, "Succeed: decoded and checked %d"
+                       " instructions\n", insns);
        return 0;
 }
index 713ed7d372475dc325ac7cfca8cc5e22181af29b..689cc6a6214df3b2f9aecb04aedabeaa2779955d 100644 (file)
@@ -3,7 +3,6 @@
 
 static bool report_gart_errors;
 static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
-static void (*orig_mce_callback)(struct mce *m);
 
 void amd_report_gart_errors(bool v)
 {
@@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec)
                pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
 }
 
-static void amd_decode_mce(struct mce *m)
+static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
+                          void *data)
 {
+       struct mce *m = (struct mce *)data;
        struct err_regs regs;
        int node, ecc;
 
@@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m)
        }
 
        amd_decode_err_code(m->status & 0xffff);
+
+       return NOTIFY_STOP;
 }
 
+static struct notifier_block amd_mce_dec_nb = {
+       .notifier_call  = amd_decode_mce,
+};
+
 static int __init mce_amd_init(void)
 {
        /*
         * We can decode MCEs for Opteron and later CPUs:
         */
        if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
-           (boot_cpu_data.x86 >= 0xf)) {
-               /* safe the default decode mce callback */
-               orig_mce_callback = x86_mce_decode_callback;
-
-               x86_mce_decode_callback = amd_decode_mce;
-       }
+           (boot_cpu_data.x86 >= 0xf))
+               atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
 
        return 0;
 }
@@ -442,7 +445,7 @@ early_initcall(mce_amd_init);
 #ifdef MODULE
 static void __exit mce_amd_exit(void)
 {
-       x86_mce_decode_callback = orig_mce_callback;
+       atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
 }
 
 MODULE_DESCRIPTION("AMD MCE decoder");
index 43360c1d8f70a1d9bf7dc436bb1b82ee497eda6f..47bbdf9c38d0428328e48d9c772cdc3086375e5c 100644 (file)
@@ -137,13 +137,8 @@ struct ftrace_event_call {
 
 #define FTRACE_MAX_PROFILE_SIZE        2048
 
-struct perf_trace_buf {
-       char    buf[FTRACE_MAX_PROFILE_SIZE];
-       int     recursion;
-};
-
-extern struct perf_trace_buf   *perf_trace_buf;
-extern struct perf_trace_buf   *perf_trace_buf_nmi;
+extern char *perf_trace_buf;
+extern char *perf_trace_buf_nmi;
 
 #define MAX_FILTER_PRED                32
 #define MAX_FILTER_STR_VAL     256     /* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
new file mode 100644 (file)
index 0000000..a03daed
--- /dev/null
@@ -0,0 +1,131 @@
+#ifndef _LINUX_HW_BREAKPOINT_H
+#define _LINUX_HW_BREAKPOINT_H
+
+enum {
+       HW_BREAKPOINT_LEN_1 = 1,
+       HW_BREAKPOINT_LEN_2 = 2,
+       HW_BREAKPOINT_LEN_4 = 4,
+       HW_BREAKPOINT_LEN_8 = 8,
+};
+
+enum {
+       HW_BREAKPOINT_R = 1,
+       HW_BREAKPOINT_W = 2,
+       HW_BREAKPOINT_X = 4,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/perf_event.h>
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
+/* As it's for in-kernel or ptrace use, we want it to be pinned */
+#define DEFINE_BREAKPOINT_ATTR(name)   \
+struct perf_event_attr name = {                \
+       .type = PERF_TYPE_BREAKPOINT,   \
+       .size = sizeof(name),           \
+       .pinned = 1,                    \
+};
+
+static inline void hw_breakpoint_init(struct perf_event_attr *attr)
+{
+       attr->type = PERF_TYPE_BREAKPOINT;
+       attr->size = sizeof(*attr);
+       attr->pinned = 1;
+}
+
+static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
+{
+       return bp->attr.bp_addr;
+}
+
+static inline int hw_breakpoint_type(struct perf_event *bp)
+{
+       return bp->attr.bp_type;
+}
+
+static inline int hw_breakpoint_len(struct perf_event *bp)
+{
+       return bp->attr.bp_len;
+}
+
+extern struct perf_event *
+register_user_hw_breakpoint(struct perf_event_attr *attr,
+                           perf_callback_t triggered,
+                           struct task_struct *tsk);
+
+/* FIXME: only change from the attr, and don't unregister */
+extern struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+                         struct perf_event_attr *attr,
+                         perf_callback_t triggered,
+                         struct task_struct *tsk);
+
+/*
+ * Kernel breakpoints are not associated with any particular thread.
+ */
+extern struct perf_event *
+register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
+                               perf_callback_t triggered,
+                               int cpu);
+
+extern struct perf_event **
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+                           perf_callback_t triggered);
+
+extern int register_perf_hw_breakpoint(struct perf_event *bp);
+extern int __register_perf_hw_breakpoint(struct perf_event *bp);
+extern void unregister_hw_breakpoint(struct perf_event *bp);
+extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
+
+extern int reserve_bp_slot(struct perf_event *bp);
+extern void release_bp_slot(struct perf_event *bp);
+
+extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+       return &bp->hw.info;
+}
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+
+static inline struct perf_event *
+register_user_hw_breakpoint(struct perf_event_attr *attr,
+                           perf_callback_t triggered,
+                           struct task_struct *tsk)    { return NULL; }
+static inline struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+                         struct perf_event_attr *attr,
+                         perf_callback_t triggered,
+                         struct task_struct *tsk)      { return NULL; }
+static inline struct perf_event *
+register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
+                               perf_callback_t triggered,
+                               int cpu)                { return NULL; }
+static inline struct perf_event **
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+                           perf_callback_t triggered)  { return NULL; }
+static inline int
+register_perf_hw_breakpoint(struct perf_event *bp)     { return -ENOSYS; }
+static inline int
+__register_perf_hw_breakpoint(struct perf_event *bp)   { return -ENOSYS; }
+static inline void unregister_hw_breakpoint(struct perf_event *bp)     { }
+static inline void
+unregister_wide_hw_breakpoint(struct perf_event **cpu_events)          { }
+static inline int
+reserve_bp_slot(struct perf_event *bp)                 {return -ENOSYS; }
+static inline void release_bp_slot(struct perf_event *bp)              { }
+
+static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
+
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+       return NULL;
+}
+
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_HW_BREAKPOINT_H */
index 7f87563c8485a62da6aafe2e646bd5eb6bdb73ab..43adbd7f0010c21719f75dc05dfbf5d61465abf7 100644 (file)
 #include <linux/ioctl.h>
 #include <asm/byteorder.h>
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <asm/hw_breakpoint.h>
+#endif
+
 /*
  * User-space ABI bits:
  */
@@ -31,6 +35,7 @@ enum perf_type_id {
        PERF_TYPE_TRACEPOINT                    = 2,
        PERF_TYPE_HW_CACHE                      = 3,
        PERF_TYPE_RAW                           = 4,
+       PERF_TYPE_BREAKPOINT                    = 5,
 
        PERF_TYPE_MAX,                          /* non-ABI */
 };
@@ -209,6 +214,15 @@ struct perf_event_attr {
                __u32           wakeup_events;    /* wakeup every n events */
                __u32           wakeup_watermark; /* bytes before wakeup   */
        };
+
+       union {
+               struct { /* Hardware breakpoint info */
+                       __u64           bp_addr;
+                       __u32           bp_type;
+                       __u32           bp_len;
+               };
+       };
+
        __u32                   __reserved_2;
 
        __u64                   __reserved_3;
@@ -478,6 +492,11 @@ struct hw_perf_event {
                        s64             remaining;
                        struct hrtimer  hrtimer;
                };
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+               union { /* breakpoint */
+                       struct arch_hw_breakpoint       info;
+               };
+#endif
        };
        atomic64_t                      prev_count;
        u64                             sample_period;
@@ -546,6 +565,10 @@ struct perf_pending_entry {
        void (*func)(struct perf_pending_entry *);
 };
 
+typedef void (*perf_callback_t)(struct perf_event *, void *);
+
+struct perf_sample_data;
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -588,7 +611,7 @@ struct perf_event {
        u64                             tstamp_running;
        u64                             tstamp_stopped;
 
-       struct perf_event_attr  attr;
+       struct perf_event_attr          attr;
        struct hw_perf_event            hw;
 
        struct perf_event_context       *ctx;
@@ -637,10 +660,18 @@ struct perf_event {
        struct pid_namespace            *ns;
        u64                             id;
 
+       void (*overflow_handler)(struct perf_event *event,
+                       int nmi, struct perf_sample_data *data,
+                       struct pt_regs *regs);
+
 #ifdef CONFIG_EVENT_PROFILE
        struct event_filter             *filter;
 #endif
 
+       perf_callback_t                 callback;
+
+       perf_callback_t                 event_callback;
+
 #endif /* CONFIG_PERF_EVENTS */
 };
 
@@ -745,6 +776,14 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader,
               struct perf_cpu_context *cpuctx,
               struct perf_event_context *ctx, int cpu);
 extern void perf_event_update_userpage(struct perf_event *event);
+extern int perf_event_release_kernel(struct perf_event *event);
+extern struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr,
+                               int cpu,
+                               pid_t pid,
+                               perf_callback_t callback);
+extern u64 perf_event_read_value(struct perf_event *event,
+                                u64 *enabled, u64 *running);
 
 struct perf_sample_data {
        u64                             type;
@@ -821,6 +860,7 @@ extern int sysctl_perf_event_sample_rate;
 extern void perf_event_init(void);
 extern void perf_tp_event(int event_id, u64 addr, u64 count,
                                 void *record, int entry_size);
+extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
 #define perf_misc_flags(regs)  (user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -834,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle,
 extern void perf_output_end(struct perf_output_handle *handle);
 extern void perf_output_copy(struct perf_output_handle *handle,
                             const void *buf, unsigned int len);
+extern int perf_swevent_get_recursion_context(void);
+extern void perf_swevent_put_recursion_context(int rctx);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task, int cpu)            { }
@@ -855,11 +897,15 @@ static inline int perf_event_task_enable(void)                            { return -EINVAL; }
 static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
                     struct pt_regs *regs, u64 addr)                    { }
+static inline void
+perf_bp_event(struct perf_event *event, void *data)            { }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)         { }
 static inline void perf_event_comm(struct task_struct *tsk)            { }
 static inline void perf_event_fork(struct task_struct *tsk)            { }
 static inline void perf_event_init(void)                               { }
+static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
+static inline void perf_swevent_put_recursion_context(int rctx)                { }
 
 #endif
 
index b50974a93af0b83d2c1e3a826f4bdc6a8848ba62..e79e2f3ccc516e73b42f32c32ddb772c5cb1cc2c 100644 (file)
@@ -99,37 +99,16 @@ struct perf_event_attr;
 #define __SC_TEST6(t6, a6, ...)        __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
 #ifdef CONFIG_EVENT_PROFILE
-#define TRACE_SYS_ENTER_PROFILE(sname)                                        \
-static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused)      \
-{                                                                             \
-       return reg_prof_syscall_enter("sys"#sname);                            \
-}                                                                             \
-                                                                              \
-static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused)    \
-{                                                                             \
-       unreg_prof_syscall_enter("sys"#sname);                                 \
-}
-
-#define TRACE_SYS_EXIT_PROFILE(sname)                                         \
-static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused)       \
-{                                                                             \
-       return reg_prof_syscall_exit("sys"#sname);                             \
-}                                                                             \
-                                                                              \
-static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
-{                                                                              \
-       unreg_prof_syscall_exit("sys"#sname);                                  \
-}
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)                                   \
        .profile_count = ATOMIC_INIT(-1),                                      \
-       .profile_enable = prof_sysenter_enable_##sname,                        \
-       .profile_disable = prof_sysenter_disable_##sname,
+       .profile_enable = prof_sysenter_enable,                                \
+       .profile_disable = prof_sysenter_disable,
 
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)                                    \
        .profile_count = ATOMIC_INIT(-1),                                      \
-       .profile_enable = prof_sysexit_enable_##sname,                         \
-       .profile_disable = prof_sysexit_disable_##sname,
+       .profile_enable = prof_sysexit_enable,                                 \
+       .profile_disable = prof_sysexit_disable,
 #else
 #define TRACE_SYS_ENTER_PROFILE(sname)
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)
@@ -153,74 +132,46 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 #define __SC_STR_TDECL6(t, a, ...)     #t, __SC_STR_TDECL5(__VA_ARGS__)
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)                               \
+       static const struct syscall_metadata __syscall_meta_##sname;    \
        static struct ftrace_event_call event_enter_##sname;            \
-       struct trace_event enter_syscall_print_##sname = {              \
+       static struct trace_event enter_syscall_print_##sname = {       \
                .trace                  = print_syscall_enter,          \
        };                                                              \
-       static int init_enter_##sname(struct ftrace_event_call *call)   \
-       {                                                               \
-               int num, id;                                            \
-               num = syscall_name_to_nr("sys"#sname);                  \
-               if (num < 0)                                            \
-                       return -ENOSYS;                                 \
-               id = register_ftrace_event(&enter_syscall_print_##sname);\
-               if (!id)                                                \
-                       return -ENODEV;                                 \
-               event_enter_##sname.id = id;                            \
-               set_syscall_enter_id(num, id);                          \
-               INIT_LIST_HEAD(&event_enter_##sname.fields);            \
-               return 0;                                               \
-       }                                                               \
-       TRACE_SYS_ENTER_PROFILE(sname);                                 \
        static struct ftrace_event_call __used                          \
          __attribute__((__aligned__(4)))                               \
          __attribute__((section("_ftrace_events")))                    \
          event_enter_##sname = {                                       \
                .name                   = "sys_enter"#sname,            \
                .system                 = "syscalls",                   \
-               .event                  = &event_syscall_enter,         \
-               .raw_init               = init_enter_##sname,           \
+               .event                  = &enter_syscall_print_##sname, \
+               .raw_init               = init_syscall_trace,           \
                .show_format            = syscall_enter_format,         \
                .define_fields          = syscall_enter_define_fields,  \
                .regfunc                = reg_event_syscall_enter,      \
                .unregfunc              = unreg_event_syscall_enter,    \
-               .data                   = "sys"#sname,                  \
+               .data                   = (void *)&__syscall_meta_##sname,\
                TRACE_SYS_ENTER_PROFILE_INIT(sname)                     \
        }
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)                                        \
+       static const struct syscall_metadata __syscall_meta_##sname;    \
        static struct ftrace_event_call event_exit_##sname;             \
-       struct trace_event exit_syscall_print_##sname = {               \
+       static struct trace_event exit_syscall_print_##sname = {        \
                .trace                  = print_syscall_exit,           \
        };                                                              \
-       static int init_exit_##sname(struct ftrace_event_call *call)    \
-       {                                                               \
-               int num, id;                                            \
-               num = syscall_name_to_nr("sys"#sname);                  \
-               if (num < 0)                                            \
-                       return -ENOSYS;                                 \
-               id = register_ftrace_event(&exit_syscall_print_##sname);\
-               if (!id)                                                \
-                       return -ENODEV;                                 \
-               event_exit_##sname.id = id;                             \
-               set_syscall_exit_id(num, id);                           \
-               INIT_LIST_HEAD(&event_exit_##sname.fields);             \
-               return 0;                                               \
-       }                                                               \
-       TRACE_SYS_EXIT_PROFILE(sname);                                  \
        static struct ftrace_event_call __used                          \
          __attribute__((__aligned__(4)))                               \
          __attribute__((section("_ftrace_events")))                    \
          event_exit_##sname = {                                        \
                .name                   = "sys_exit"#sname,             \
                .system                 = "syscalls",                   \
-               .event                  = &event_syscall_exit,          \
-               .raw_init               = init_exit_##sname,            \
+               .event                  = &exit_syscall_print_##sname,  \
+               .raw_init               = init_syscall_trace,           \
                .show_format            = syscall_exit_format,          \
                .define_fields          = syscall_exit_define_fields,   \
                .regfunc                = reg_event_syscall_exit,       \
                .unregfunc              = unreg_event_syscall_exit,     \
-               .data                   = "sys"#sname,                  \
+               .data                   = (void *)&__syscall_meta_##sname,\
                TRACE_SYS_EXIT_PROFILE_INIT(sname)                      \
        }
 
index 2aac8a83e89b9a7994b7570ffb3ea879d763ee23..f59604ed0ec606c75449d6b2cd8416abf6f7f38f 100644 (file)
@@ -280,6 +280,12 @@ static inline void tracepoint_synchronize_unregister(void)
  * TRACE_EVENT_FN to perform any (un)registration work.
  */
 
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
+#define DEFINE_EVENT(template, name, proto, args)              \
+       DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+
 #define TRACE_EVENT(name, proto, args, struct, assign, print)  \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,              \
index 2a4b3bf740336b23418ae972745f8704a2f420fb..5acfb1eb4df91cd096da3ee728bd636955247b0f 100644 (file)
                assign, print, reg, unreg)                      \
        DEFINE_TRACE_FN(name, reg, unreg)
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args) \
+       DEFINE_TRACE(name)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_TRACE(name)
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)       \
        DEFINE_TRACE(name)
@@ -63,6 +71,9 @@
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef DECLARE_EVENT_CLASS
+#undef DEFINE_EVENT
+#undef DEFINE_EVENT_PRINT
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
index 00405b5f624a2d742a1d95d2a6fb59d530b6e369..5fb72733331e4e8a16d0144ad97097b174de80a8 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/blkdev.h>
 #include <linux/tracepoint.h>
 
-TRACE_EVENT(block_rq_abort,
+DECLARE_EVENT_CLASS(block_rq_with_error,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
 
@@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort,
                  __entry->nr_sector, __entry->errors)
 );
 
-TRACE_EVENT(block_rq_insert,
+DEFINE_EVENT(block_rq_with_error, block_rq_abort,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
 
-       TP_ARGS(q, rq),
+       TP_ARGS(q, rq)
+);
 
-       TP_STRUCT__entry(
-               __field(  dev_t,        dev                     )
-               __field(  sector_t,     sector                  )
-               __field(  unsigned int, nr_sector               )
-               __field(  unsigned int, bytes                   )
-               __array(  char,         rwbs,   6               )
-               __array(  char,         comm,   TASK_COMM_LEN   )
-               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
-       ),
+DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
 
-       TP_fast_assign(
-               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-               __entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-               __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-               __entry->bytes     = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0;
+       TP_PROTO(struct request_queue *q, struct request *rq),
 
-               blk_fill_rwbs_rq(__entry->rwbs, rq);
-               blk_dump_cmd(__get_str(cmd), rq);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-       ),
+       TP_ARGS(q, rq)
+);
 
-       TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs, __entry->bytes, __get_str(cmd),
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm)
+DEFINE_EVENT(block_rq_with_error, block_rq_complete,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
 );
 
-TRACE_EVENT(block_rq_issue,
+DECLARE_EVENT_CLASS(block_rq,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
 
@@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue,
                __field(  unsigned int, nr_sector               )
                __field(  unsigned int, bytes                   )
                __array(  char,         rwbs,   6               )
-               __array(  char,         comm,   TASK_COMM_LEN   )
+               __array(  char,         comm,   TASK_COMM_LEN   )
                __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
        ),
 
@@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue,
                  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_rq_requeue,
+DEFINE_EVENT(block_rq, block_rq_insert,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
 
-       TP_ARGS(q, rq),
-
-       TP_STRUCT__entry(
-               __field(  dev_t,        dev                     )
-               __field(  sector_t,     sector                  )
-               __field(  unsigned int, nr_sector               )
-               __field(  int,          errors                  )
-               __array(  char,         rwbs,   6               )
-               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
-       ),
-
-       TP_fast_assign(
-               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-               __entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-               __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-               __entry->errors    = rq->errors;
-
-               blk_fill_rwbs_rq(__entry->rwbs, rq);
-               blk_dump_cmd(__get_str(cmd), rq);
-       ),
-
-       TP_printk("%d,%d %s (%s) %llu + %u [%d]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs, __get_str(cmd),
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->errors)
+       TP_ARGS(q, rq)
 );
 
-TRACE_EVENT(block_rq_complete,
+DEFINE_EVENT(block_rq, block_rq_issue,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
 
-       TP_ARGS(q, rq),
-
-       TP_STRUCT__entry(
-               __field(  dev_t,        dev                     )
-               __field(  sector_t,     sector                  )
-               __field(  unsigned int, nr_sector               )
-               __field(  int,          errors                  )
-               __array(  char,         rwbs,   6               )
-               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
-       ),
-
-       TP_fast_assign(
-               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-               __entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-               __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-               __entry->errors    = rq->errors;
-
-               blk_fill_rwbs_rq(__entry->rwbs, rq);
-               blk_dump_cmd(__get_str(cmd), rq);
-       ),
-
-       TP_printk("%d,%d %s (%s) %llu + %u [%d]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs, __get_str(cmd),
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->errors)
+       TP_ARGS(q, rq)
 );
 
 TRACE_EVENT(block_bio_bounce,
@@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete,
                  __entry->nr_sector, __entry->error)
 );
 
-TRACE_EVENT(block_bio_backmerge,
+DECLARE_EVENT_CLASS(block_bio,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
 
@@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge,
                  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_bio_frontmerge,
+DEFINE_EVENT(block_bio, block_bio_backmerge,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
 
-       TP_ARGS(q, bio),
-
-       TP_STRUCT__entry(
-               __field( dev_t,         dev                     )
-               __field( sector_t,      sector                  )
-               __field( unsigned,      nr_sector               )
-               __array( char,          rwbs,   6               )
-               __array( char,          comm,   TASK_COMM_LEN   )
-       ),
-
-       TP_fast_assign(
-               __entry->dev            = bio->bi_bdev->bd_dev;
-               __entry->sector         = bio->bi_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-       ),
-
-       TP_printk("%d,%d %s %llu + %u [%s]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm)
+       TP_ARGS(q, bio)
 );
 
-TRACE_EVENT(block_bio_queue,
+DEFINE_EVENT(block_bio, block_bio_frontmerge,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
 
-       TP_ARGS(q, bio),
+       TP_ARGS(q, bio)
+);
 
-       TP_STRUCT__entry(
-               __field( dev_t,         dev                     )
-               __field( sector_t,      sector                  )
-               __field( unsigned int,  nr_sector               )
-               __array( char,          rwbs,   6               )
-               __array( char,          comm,   TASK_COMM_LEN   )
-       ),
+DEFINE_EVENT(block_bio, block_bio_queue,
 
-       TP_fast_assign(
-               __entry->dev            = bio->bi_bdev->bd_dev;
-               __entry->sector         = bio->bi_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-       ),
+       TP_PROTO(struct request_queue *q, struct bio *bio),
 
-       TP_printk("%d,%d %s %llu + %u [%s]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm)
+       TP_ARGS(q, bio)
 );
 
-TRACE_EVENT(block_getrq,
+DECLARE_EVENT_CLASS(block_get_rq,
 
        TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
@@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq,
                  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_sleeprq,
+DEFINE_EVENT(block_get_rq, block_getrq,
 
        TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
-       TP_ARGS(q, bio, rw),
+       TP_ARGS(q, bio, rw)
+);
 
-       TP_STRUCT__entry(
-               __field( dev_t,         dev                     )
-               __field( sector_t,      sector                  )
-               __field( unsigned int,  nr_sector               )
-               __array( char,          rwbs,   6               )
-               __array( char,          comm,   TASK_COMM_LEN   )
-       ),
+DEFINE_EVENT(block_get_rq, block_sleeprq,
 
-       TP_fast_assign(
-               __entry->dev            = bio ? bio->bi_bdev->bd_dev : 0;
-               __entry->sector         = bio ? bio->bi_sector : 0;
-               __entry->nr_sector      = bio ? bio->bi_size >> 9 : 0;
-               blk_fill_rwbs(__entry->rwbs,
-                           bio ? bio->bi_rw : 0, __entry->nr_sector);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-       ),
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
-       TP_printk("%d,%d %s %llu + %u [%s]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm)
+       TP_ARGS(q, bio, rw)
 );
 
 TRACE_EVENT(block_plug,
@@ -387,7 +274,7 @@ TRACE_EVENT(block_plug,
        TP_printk("[%s]", __entry->comm)
 );
 
-TRACE_EVENT(block_unplug_timer,
+DECLARE_EVENT_CLASS(block_unplug,
 
        TP_PROTO(struct request_queue *q),
 
@@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer,
        TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
 );
 
-TRACE_EVENT(block_unplug_io,
+DEFINE_EVENT(block_unplug, block_unplug_timer,
 
        TP_PROTO(struct request_queue *q),
 
-       TP_ARGS(q),
+       TP_ARGS(q)
+);
 
-       TP_STRUCT__entry(
-               __field( int,           nr_rq                   )
-               __array( char,          comm,   TASK_COMM_LEN   )
-       ),
+DEFINE_EVENT(block_unplug, block_unplug_io,
 
-       TP_fast_assign(
-               __entry->nr_rq  = q->rq.count[READ] + q->rq.count[WRITE];
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-       ),
+       TP_PROTO(struct request_queue *q),
 
-       TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+       TP_ARGS(q)
 );
 
 TRACE_EVENT(block_split,
index d09550bf3f951ec4a9230f9156a5fd29190b9e13..318f76535bd44c888d2a1045f0f4a520d3c5799a 100644 (file)
@@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode,
                  (unsigned long) __entry->dir, __entry->mode)
 );
 
-TRACE_EVENT(ext4_write_begin,
+DECLARE_EVENT_CLASS(ext4__write_begin,
 
        TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
                 unsigned int flags),
@@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin,
                  __entry->pos, __entry->len, __entry->flags)
 );
 
-TRACE_EVENT(ext4_ordered_write_end,
+DEFINE_EVENT(ext4__write_begin, ext4_write_begin,
+
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int flags),
+
+       TP_ARGS(inode, pos, len, flags)
+);
+
+DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin,
+
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int flags),
+
+       TP_ARGS(inode, pos, len, flags)
+);
+
+DECLARE_EVENT_CLASS(ext4__write_end,
        TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
                        unsigned int copied),
 
@@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end,
                  __entry->pos, __entry->len, __entry->copied)
 );
 
-TRACE_EVENT(ext4_writeback_write_end,
+DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
+
        TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
                 unsigned int copied),
 
-       TP_ARGS(inode, pos, len, copied),
+       TP_ARGS(inode, pos, len, copied)
+);
 
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        ino_t,  ino                     )
-               __field(        loff_t, pos                     )
-               __field(        unsigned int, len               )
-               __field(        unsigned int, copied            )
-       ),
+DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end,
 
-       TP_fast_assign(
-               __entry->dev    = inode->i_sb->s_dev;
-               __entry->ino    = inode->i_ino;
-               __entry->pos    = pos;
-               __entry->len    = len;
-               __entry->copied = copied;
-       ),
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int copied),
 
-       TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->pos, __entry->len, __entry->copied)
+       TP_ARGS(inode, pos, len, copied)
 );
 
-TRACE_EVENT(ext4_journalled_write_end,
+DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end,
+
        TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
                 unsigned int copied),
-       TP_ARGS(inode, pos, len, copied),
 
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        ino_t,  ino                     )
-               __field(        loff_t, pos                     )
-               __field(        unsigned int, len               )
-               __field(        unsigned int, copied            )
-       ),
+       TP_ARGS(inode, pos, len, copied)
+);
 
-       TP_fast_assign(
-               __entry->dev    = inode->i_sb->s_dev;
-               __entry->ino    = inode->i_ino;
-               __entry->pos    = pos;
-               __entry->len    = len;
-               __entry->copied = copied;
-       ),
+DEFINE_EVENT(ext4__write_end, ext4_da_write_end,
 
-       TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->pos, __entry->len, __entry->copied)
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied)
 );
 
 TRACE_EVENT(ext4_writepage,
@@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result,
                  (unsigned long) __entry->writeback_index)
 );
 
-TRACE_EVENT(ext4_da_write_begin,
-       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
-                       unsigned int flags),
-
-       TP_ARGS(inode, pos, len, flags),
-
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        ino_t,  ino                     )
-               __field(        loff_t, pos                     )
-               __field(        unsigned int, len               )
-               __field(        unsigned int, flags             )
-       ),
-
-       TP_fast_assign(
-               __entry->dev    = inode->i_sb->s_dev;
-               __entry->ino    = inode->i_ino;
-               __entry->pos    = pos;
-               __entry->len    = len;
-               __entry->flags  = flags;
-       ),
-
-       TP_printk("dev %s ino %lu pos %llu len %u flags %u",
-                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->pos, __entry->len, __entry->flags)
-);
-
-TRACE_EVENT(ext4_da_write_end,
-       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
-                       unsigned int copied),
-
-       TP_ARGS(inode, pos, len, copied),
-
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        ino_t,  ino                     )
-               __field(        loff_t, pos                     )
-               __field(        unsigned int, len               )
-               __field(        unsigned int, copied            )
-       ),
-
-       TP_fast_assign(
-               __entry->dev    = inode->i_sb->s_dev;
-               __entry->ino    = inode->i_ino;
-               __entry->pos    = pos;
-               __entry->len    = len;
-               __entry->copied = copied;
-       ),
-
-       TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->pos, __entry->len, __entry->copied)
-);
-
 TRACE_EVENT(ext4_discard_blocks,
        TP_PROTO(struct super_block *sb, unsigned long long blk,
                        unsigned long long count),
index dcfcd44076232700f14d5a6ff68c3f5e7b8e1ef2..0e4cfb694fe70630457af67e1b1bc568f56c9b09 100644 (file)
@@ -82,18 +82,7 @@ TRACE_EVENT(irq_handler_exit,
                  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
-/**
- * softirq_entry - called immediately before the softirq handler
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
- *
- * The @h parameter, contains a pointer to the struct softirq_action
- * which has a pointer to the action handler that is called. By subtracting
- * the @vec pointer from the @h pointer, we can determine the softirq
- * number. Also, when used in combination with the softirq_exit tracepoint
- * we can determine the softirq latency.
- */
-TRACE_EVENT(softirq_entry,
+DECLARE_EVENT_CLASS(softirq,
 
        TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
 
@@ -111,6 +100,24 @@ TRACE_EVENT(softirq_entry,
                  show_softirq_name(__entry->vec))
 );
 
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter, contains a pointer to the struct softirq_action
+ * which has a pointer to the action handler that is called. By subtracting
+ * the @vec pointer from the @h pointer, we can determine the softirq
+ * number. Also, when used in combination with the softirq_exit tracepoint
+ * we can determine the softirq latency.
+ */
+DEFINE_EVENT(softirq, softirq_entry,
+
+       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+       TP_ARGS(h, vec)
+);
+
 /**
  * softirq_exit - called immediately after the softirq handler returns
  * @h: pointer to struct softirq_action
@@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry,
  * combination with the softirq_entry tracepoint we can determine the softirq
  * latency.
  */
-TRACE_EVENT(softirq_exit,
+DEFINE_EVENT(softirq, softirq_exit,
 
        TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
 
-       TP_ARGS(h, vec),
-
-       TP_STRUCT__entry(
-               __field(        int,    vec                     )
-       ),
-
-       TP_fast_assign(
-               __entry->vec = (int)(h - vec);
-       ),
-
-       TP_printk("vec=%d [action=%s]", __entry->vec,
-                 show_softirq_name(__entry->vec))
+       TP_ARGS(h, vec)
 );
 
 #endif /*  _TRACE_IRQ_H */
index 3c60b75adb9e226de91d34cb9c635fbc999a9fa6..96b370a050deb27ca6fe8393990e52734dbbff09 100644 (file)
@@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint,
                  jbd2_dev_to_name(__entry->dev), __entry->result)
 );
 
-TRACE_EVENT(jbd2_start_commit,
+DECLARE_EVENT_CLASS(jbd2_commit,
 
        TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
@@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit,
                  __entry->sync_commit)
 );
 
-TRACE_EVENT(jbd2_commit_locking,
+DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
 
        TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-       TP_ARGS(journal, commit_transaction),
-
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        char,   sync_commit               )
-               __field(        int,    transaction               )
-       ),
-
-       TP_fast_assign(
-               __entry->dev            = journal->j_fs_dev->bd_dev;
-               __entry->sync_commit = commit_transaction->t_synchronous_commit;
-               __entry->transaction    = commit_transaction->t_tid;
-       ),
-
-       TP_printk("dev %s transaction %d sync %d",
-                 jbd2_dev_to_name(__entry->dev), __entry->transaction,
-                 __entry->sync_commit)
+       TP_ARGS(journal, commit_transaction)
 );
 
-TRACE_EVENT(jbd2_commit_flushing,
+DEFINE_EVENT(jbd2_commit, jbd2_commit_locking,
 
        TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-       TP_ARGS(journal, commit_transaction),
-
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        char,   sync_commit               )
-               __field(        int,    transaction               )
-       ),
-
-       TP_fast_assign(
-               __entry->dev            = journal->j_fs_dev->bd_dev;
-               __entry->sync_commit = commit_transaction->t_synchronous_commit;
-               __entry->transaction    = commit_transaction->t_tid;
-       ),
-
-       TP_printk("dev %s transaction %d sync %d",
-                 jbd2_dev_to_name(__entry->dev), __entry->transaction,
-                 __entry->sync_commit)
+       TP_ARGS(journal, commit_transaction)
 );
 
-TRACE_EVENT(jbd2_commit_logging,
+DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing,
 
        TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-       TP_ARGS(journal, commit_transaction),
+       TP_ARGS(journal, commit_transaction)
+);
 
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        char,   sync_commit               )
-               __field(        int,    transaction               )
-       ),
+DEFINE_EVENT(jbd2_commit, jbd2_commit_logging,
 
-       TP_fast_assign(
-               __entry->dev            = journal->j_fs_dev->bd_dev;
-               __entry->sync_commit = commit_transaction->t_synchronous_commit;
-               __entry->transaction    = commit_transaction->t_tid;
-       ),
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-       TP_printk("dev %s transaction %d sync %d",
-                 jbd2_dev_to_name(__entry->dev), __entry->transaction,
-                 __entry->sync_commit)
+       TP_ARGS(journal, commit_transaction)
 );
 
 TRACE_EVENT(jbd2_end_commit,
index eaf46bdd18a5f81719b21de90fba91d5e64e78ea..3adca0ca9dbee10479d34d5a3e3562609ef89e86 100644 (file)
@@ -44,7 +44,7 @@
        {(unsigned long)__GFP_MOVABLE,          "GFP_MOVABLE"}          \
        ) : "GFP_NOWAIT"
 
-TRACE_EVENT(kmalloc,
+DECLARE_EVENT_CLASS(kmem_alloc,
 
        TP_PROTO(unsigned long call_site,
                 const void *ptr,
@@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc,
                show_gfp_flags(__entry->gfp_flags))
 );
 
-TRACE_EVENT(kmem_cache_alloc,
+DEFINE_EVENT(kmem_alloc, kmalloc,
 
-       TP_PROTO(unsigned long call_site,
-                const void *ptr,
-                size_t bytes_req,
-                size_t bytes_alloc,
-                gfp_t gfp_flags),
+       TP_PROTO(unsigned long call_site, const void *ptr,
+                size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+);
 
-       TP_STRUCT__entry(
-               __field(        unsigned long,  call_site       )
-               __field(        const void *,   ptr             )
-               __field(        size_t,         bytes_req       )
-               __field(        size_t,         bytes_alloc     )
-               __field(        gfp_t,          gfp_flags       )
-       ),
+DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
 
-       TP_fast_assign(
-               __entry->call_site      = call_site;
-               __entry->ptr            = ptr;
-               __entry->bytes_req      = bytes_req;
-               __entry->bytes_alloc    = bytes_alloc;
-               __entry->gfp_flags      = gfp_flags;
-       ),
+       TP_PROTO(unsigned long call_site, const void *ptr,
+                size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-       TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
-               __entry->call_site,
-               __entry->ptr,
-               __entry->bytes_req,
-               __entry->bytes_alloc,
-               show_gfp_flags(__entry->gfp_flags))
+       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
 );
 
-TRACE_EVENT(kmalloc_node,
+DECLARE_EVENT_CLASS(kmem_alloc_node,
 
        TP_PROTO(unsigned long call_site,
                 const void *ptr,
@@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node,
                __entry->node)
 );
 
-TRACE_EVENT(kmem_cache_alloc_node,
+DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
-       TP_PROTO(unsigned long call_site,
-                const void *ptr,
-                size_t bytes_req,
-                size_t bytes_alloc,
-                gfp_t gfp_flags,
-                int node),
+       TP_PROTO(unsigned long call_site, const void *ptr,
+                size_t bytes_req, size_t bytes_alloc,
+                gfp_t gfp_flags, int node),
 
-       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+);
 
-       TP_STRUCT__entry(
-               __field(        unsigned long,  call_site       )
-               __field(        const void *,   ptr             )
-               __field(        size_t,         bytes_req       )
-               __field(        size_t,         bytes_alloc     )
-               __field(        gfp_t,          gfp_flags       )
-               __field(        int,            node            )
-       ),
+DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
 
-       TP_fast_assign(
-               __entry->call_site      = call_site;
-               __entry->ptr            = ptr;
-               __entry->bytes_req      = bytes_req;
-               __entry->bytes_alloc    = bytes_alloc;
-               __entry->gfp_flags      = gfp_flags;
-               __entry->node           = node;
-       ),
+       TP_PROTO(unsigned long call_site, const void *ptr,
+                size_t bytes_req, size_t bytes_alloc,
+                gfp_t gfp_flags, int node),
 
-       TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
-               __entry->call_site,
-               __entry->ptr,
-               __entry->bytes_req,
-               __entry->bytes_alloc,
-               show_gfp_flags(__entry->gfp_flags),
-               __entry->node)
+       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
-TRACE_EVENT(kfree,
+DECLARE_EVENT_CLASS(kmem_free,
 
        TP_PROTO(unsigned long call_site, const void *ptr),
 
@@ -207,23 +169,18 @@ TRACE_EVENT(kfree,
        TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
 
-TRACE_EVENT(kmem_cache_free,
+DEFINE_EVENT(kmem_free, kfree,
 
        TP_PROTO(unsigned long call_site, const void *ptr),
 
-       TP_ARGS(call_site, ptr),
+       TP_ARGS(call_site, ptr)
+);
 
-       TP_STRUCT__entry(
-               __field(        unsigned long,  call_site       )
-               __field(        const void *,   ptr             )
-       ),
+DEFINE_EVENT(kmem_free, kmem_cache_free,
 
-       TP_fast_assign(
-               __entry->call_site      = call_site;
-               __entry->ptr            = ptr;
-       ),
+       TP_PROTO(unsigned long call_site, const void *ptr),
 
-       TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+       TP_ARGS(call_site, ptr)
 );
 
 TRACE_EVENT(mm_page_free_direct,
@@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc,
                show_gfp_flags(__entry->gfp_flags))
 );
 
-TRACE_EVENT(mm_page_alloc_zone_locked,
+DECLARE_EVENT_CLASS(mm_page,
 
        TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
@@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked,
                __entry->order == 0)
 );
 
-TRACE_EVENT(mm_page_pcpu_drain,
+DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,
 
-       TP_PROTO(struct page *page, int order, int migratetype),
+       TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
-       TP_ARGS(page, order, migratetype),
+       TP_ARGS(page, order, migratetype)
+);
 
-       TP_STRUCT__entry(
-               __field(        struct page *,  page            )
-               __field(        int,            order           )
-               __field(        int,            migratetype     )
-       ),
+DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain,
 
-       TP_fast_assign(
-               __entry->page           = page;
-               __entry->order          = order;
-               __entry->migratetype    = migratetype;
-       ),
+       TP_PROTO(struct page *page, unsigned int order, int migratetype),
+
+       TP_ARGS(page, order, migratetype),
 
        TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
-               __entry->page,
-               page_to_pfn(__entry->page),
-               __entry->order,
-               __entry->migratetype)
+               __entry->page, page_to_pfn(__entry->page),
+               __entry->order, __entry->migratetype)
 );
 
 TRACE_EVENT(mm_page_alloc_extfrag,
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
new file mode 100644 (file)
index 0000000..7eee778
--- /dev/null
@@ -0,0 +1,69 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mce
+
+#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MCE_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+#include <asm/mce.h>
+
+TRACE_EVENT(mce_record,
+
+       TP_PROTO(struct mce *m),
+
+       TP_ARGS(m),
+
+       TP_STRUCT__entry(
+               __field(        u64,            mcgcap          )
+               __field(        u64,            mcgstatus       )
+               __field(        u8,             bank            )
+               __field(        u64,            status          )
+               __field(        u64,            addr            )
+               __field(        u64,            misc            )
+               __field(        u64,            ip              )
+               __field(        u8,             cs              )
+               __field(        u64,            tsc             )
+               __field(        u64,            walltime        )
+               __field(        u32,            cpu             )
+               __field(        u32,            cpuid           )
+               __field(        u32,            apicid          )
+               __field(        u32,            socketid        )
+               __field(        u8,             cpuvendor       )
+       ),
+
+       TP_fast_assign(
+               __entry->mcgcap         = m->mcgcap;
+               __entry->mcgstatus      = m->mcgstatus;
+               __entry->bank           = m->bank;
+               __entry->status         = m->status;
+               __entry->addr           = m->addr;
+               __entry->misc           = m->misc;
+               __entry->ip             = m->ip;
+               __entry->cs             = m->cs;
+               __entry->tsc            = m->tsc;
+               __entry->walltime       = m->time;
+               __entry->cpu            = m->extcpu;
+               __entry->cpuid          = m->cpuid;
+               __entry->apicid         = m->apicid;
+               __entry->socketid       = m->socketid;
+               __entry->cpuvendor      = m->cpuvendor;
+       ),
+
+       TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+               __entry->cpu,
+               __entry->mcgcap, __entry->mcgstatus,
+               __entry->bank, __entry->status,
+               __entry->addr, __entry->misc,
+               __entry->cs, __entry->ip,
+               __entry->tsc,
+               __entry->cpuvendor, __entry->cpuid,
+               __entry->walltime,
+               __entry->socketid,
+               __entry->apicid)
+);
+
+#endif /* _TRACE_MCE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 84160fb18478f99da3ae79bc19bc4da6044d8978..4b0f48ba16a688da9ead5b901604419c7823ea3b 100644 (file)
@@ -51,7 +51,7 @@ TRACE_EVENT(module_free,
        TP_printk("%s", __get_str(name))
 );
 
-TRACE_EVENT(module_get,
+DECLARE_EVENT_CLASS(module_refcnt,
 
        TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
@@ -73,26 +73,18 @@ TRACE_EVENT(module_get,
                  __get_str(name), (void *)__entry->ip, __entry->refcnt)
 );
 
-TRACE_EVENT(module_put,
+DEFINE_EVENT(module_refcnt, module_get,
 
        TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
-       TP_ARGS(mod, ip, refcnt),
+       TP_ARGS(mod, ip, refcnt)
+);
 
-       TP_STRUCT__entry(
-               __field(        unsigned long,  ip              )
-               __field(        int,            refcnt          )
-               __string(       name,           mod->name       )
-       ),
+DEFINE_EVENT(module_refcnt, module_put,
 
-       TP_fast_assign(
-               __entry->ip     = ip;
-               __entry->refcnt = refcnt;
-               __assign_str(name, mod->name);
-       ),
+       TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
-       TP_printk("%s call_site=%pf refcnt=%d",
-                 __get_str(name), (void *)__entry->ip, __entry->refcnt)
+       TP_ARGS(mod, ip, refcnt)
 );
 
 TRACE_EVENT(module_request,
index 9bb96e5a284809fa21b6ca99fc73067094bdf855..c4efe9b8280d4f9c4261c24de5f80010f1f445ce 100644 (file)
@@ -16,7 +16,7 @@ enum {
 };
 #endif
 
-TRACE_EVENT(power_start,
+DECLARE_EVENT_CLASS(power,
 
        TP_PROTO(unsigned int type, unsigned int state),
 
@@ -35,42 +35,36 @@ TRACE_EVENT(power_start,
        TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
 );
 
-TRACE_EVENT(power_end,
-
-       TP_PROTO(int dummy),
+DEFINE_EVENT(power, power_start,
 
-       TP_ARGS(dummy),
+       TP_PROTO(unsigned int type, unsigned int state),
 
-       TP_STRUCT__entry(
-               __field(        u64,            dummy           )
-       ),
+       TP_ARGS(type, state)
+);
 
-       TP_fast_assign(
-               __entry->dummy = 0xffff;
-       ),
+DEFINE_EVENT(power, power_frequency,
 
-       TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+       TP_PROTO(unsigned int type, unsigned int state),
 
+       TP_ARGS(type, state)
 );
 
+TRACE_EVENT(power_end,
 
-TRACE_EVENT(power_frequency,
-
-       TP_PROTO(unsigned int type, unsigned int state),
+       TP_PROTO(int dummy),
 
-       TP_ARGS(type, state),
+       TP_ARGS(dummy),
 
        TP_STRUCT__entry(
-               __field(        u64,            type            )
-               __field(        u64,            state           )
+               __field(        u64,            dummy           )
        ),
 
        TP_fast_assign(
-               __entry->type = type;
-               __entry->state = state;
+               __entry->dummy = 0xffff;
        ),
 
-       TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state)
+       TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+
 );
 
 #endif /* _TRACE_POWER_H */
index b50b9856c59f2c3196eebf12d21a21db276db2c5..cfceb0b73e205bb936a6e3fcfeb34f5515ab0feb 100644 (file)
@@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task,
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
-TRACE_EVENT(sched_wakeup,
+DECLARE_EVENT_CLASS(sched_wakeup_template,
 
        TP_PROTO(struct rq *rq, struct task_struct *p, int success),
 
@@ -110,38 +110,19 @@ TRACE_EVENT(sched_wakeup,
                  __entry->success, __entry->target_cpu)
 );
 
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+            TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+            TP_ARGS(rq, p, success));
+
 /*
  * Tracepoint for waking up a new task:
  *
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
-TRACE_EVENT(sched_wakeup_new,
-
-       TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-       TP_ARGS(rq, p, success),
-
-       TP_STRUCT__entry(
-               __array(        char,   comm,   TASK_COMM_LEN   )
-               __field(        pid_t,  pid                     )
-               __field(        int,    prio                    )
-               __field(        int,    success                 )
-               __field(        int,    target_cpu              )
-       ),
-
-       TP_fast_assign(
-               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-               __entry->pid            = p->pid;
-               __entry->prio           = p->prio;
-               __entry->success        = success;
-               __entry->target_cpu     = task_cpu(p);
-       ),
-
-       TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
-                 __entry->comm, __entry->pid, __entry->prio,
-                 __entry->success, __entry->target_cpu)
-);
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+            TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+            TP_ARGS(rq, p, success));
 
 /*
  * Tracepoint for task switches, performed by the scheduler:
@@ -216,10 +197,7 @@ TRACE_EVENT(sched_migrate_task,
                  __entry->orig_cpu, __entry->dest_cpu)
 );
 
-/*
- * Tracepoint for freeing a task:
- */
-TRACE_EVENT(sched_process_free,
+DECLARE_EVENT_CLASS(sched_process_template,
 
        TP_PROTO(struct task_struct *p),
 
@@ -242,29 +220,19 @@ TRACE_EVENT(sched_process_free,
 );
 
 /*
- * Tracepoint for a task exiting:
+ * Tracepoint for freeing a task:
  */
-TRACE_EVENT(sched_process_exit,
+DEFINE_EVENT(sched_process_template, sched_process_free,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
+            
 
-       TP_PROTO(struct task_struct *p),
-
-       TP_ARGS(p),
-
-       TP_STRUCT__entry(
-               __array(        char,   comm,   TASK_COMM_LEN   )
-               __field(        pid_t,  pid                     )
-               __field(        int,    prio                    )
-       ),
-
-       TP_fast_assign(
-               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-               __entry->pid            = p->pid;
-               __entry->prio           = p->prio;
-       ),
-
-       TP_printk("comm=%s pid=%d prio=%d",
-                 __entry->comm, __entry->pid, __entry->prio)
-);
+/*
+ * Tracepoint for a task exiting:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_exit,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
 
 /*
  * Tracepoint for a waiting task:
@@ -319,41 +287,11 @@ TRACE_EVENT(sched_process_fork,
                __entry->child_comm, __entry->child_pid)
 );
 
-/*
- * Tracepoint for sending a signal:
- */
-TRACE_EVENT(sched_signal_send,
-
-       TP_PROTO(int sig, struct task_struct *p),
-
-       TP_ARGS(sig, p),
-
-       TP_STRUCT__entry(
-               __field(        int,    sig                     )
-               __array(        char,   comm,   TASK_COMM_LEN   )
-               __field(        pid_t,  pid                     )
-       ),
-
-       TP_fast_assign(
-               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-               __entry->pid    = p->pid;
-               __entry->sig    = sig;
-       ),
-
-       TP_printk("sig=%d comm=%s pid=%d",
-                 __entry->sig, __entry->comm, __entry->pid)
-);
-
 /*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
  */
-
-/*
- * Tracepoint for accounting wait time (time the task is runnable
- * but not actually running due to scheduler contention).
- */
-TRACE_EVENT(sched_stat_wait,
+DECLARE_EVENT_CLASS(sched_stat_template,
 
        TP_PROTO(struct task_struct *tsk, u64 delay),
 
@@ -379,6 +317,31 @@ TRACE_EVENT(sched_stat_wait,
                        (unsigned long long)__entry->delay)
 );
 
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
 /*
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
@@ -412,66 +375,6 @@ TRACE_EVENT(sched_stat_runtime,
                        (unsigned long long)__entry->vruntime)
 );
 
-/*
- * Tracepoint for accounting sleep time (time the task is not runnable,
- * including iowait, see below).
- */
-TRACE_EVENT(sched_stat_sleep,
-
-       TP_PROTO(struct task_struct *tsk, u64 delay),
-
-       TP_ARGS(tsk, delay),
-
-       TP_STRUCT__entry(
-               __array( char,  comm,   TASK_COMM_LEN   )
-               __field( pid_t, pid                     )
-               __field( u64,   delay                   )
-       ),
-
-       TP_fast_assign(
-               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-               __entry->pid    = tsk->pid;
-               __entry->delay  = delay;
-       )
-       TP_perf_assign(
-               __perf_count(delay);
-       ),
-
-       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
-                       __entry->comm, __entry->pid,
-                       (unsigned long long)__entry->delay)
-);
-
-/*
- * Tracepoint for accounting iowait time (time the task is not runnable
- * due to waiting on IO to complete).
- */
-TRACE_EVENT(sched_stat_iowait,
-
-       TP_PROTO(struct task_struct *tsk, u64 delay),
-
-       TP_ARGS(tsk, delay),
-
-       TP_STRUCT__entry(
-               __array( char,  comm,   TASK_COMM_LEN   )
-               __field( pid_t, pid                     )
-               __field( u64,   delay                   )
-       ),
-
-       TP_fast_assign(
-               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-               __entry->pid    = tsk->pid;
-               __entry->delay  = delay;
-       )
-       TP_perf_assign(
-               __perf_count(delay);
-       ),
-
-       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
-                       __entry->comm, __entry->pid,
-                       (unsigned long long)__entry->delay)
-);
-
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
new file mode 100644 (file)
index 0000000..a510b75
--- /dev/null
@@ -0,0 +1,173 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM signal
+
+#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SIGNAL_H
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#define TP_STORE_SIGINFO(__entry, info)                                \
+       do {                                                    \
+               if (info == SEND_SIG_NOINFO) {                  \
+                       __entry->errno  = 0;                    \
+                       __entry->code   = SI_USER;              \
+               } else if (info == SEND_SIG_PRIV) {             \
+                       __entry->errno  = 0;                    \
+                       __entry->code   = SI_KERNEL;            \
+               } else {                                        \
+                       __entry->errno  = info->si_errno;       \
+                       __entry->code   = info->si_code;        \
+               }                                               \
+       } while (0)
+
+/**
+ * signal_generate - called when a signal is generated
+ * @sig: signal number
+ * @info: pointer to struct siginfo
+ * @task: pointer to struct task_struct
+ *
+ * Current process sends a 'sig' signal to 'task' process with
+ * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
+ * 'info' is not a pointer and you can't access its field. Instead,
+ * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV
+ * means that si_code is SI_KERNEL.
+ */
+TRACE_EVENT(signal_generate,
+
+       TP_PROTO(int sig, struct siginfo *info, struct task_struct *task),
+
+       TP_ARGS(sig, info, task),
+
+       TP_STRUCT__entry(
+               __field(        int,    sig                     )
+               __field(        int,    errno                   )
+               __field(        int,    code                    )
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+       ),
+
+       TP_fast_assign(
+               __entry->sig    = sig;
+               TP_STORE_SIGINFO(__entry, info);
+               memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+               __entry->pid    = task->pid;
+       ),
+
+       TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d",
+                 __entry->sig, __entry->errno, __entry->code,
+                 __entry->comm, __entry->pid)
+);
+
+/**
+ * signal_deliver - called when a signal is delivered
+ * @sig: signal number
+ * @info: pointer to struct siginfo
+ * @ka: pointer to struct k_sigaction
+ *
+ * A 'sig' signal is delivered to current process with 'info' siginfo,
+ * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or
+ * SIG_DFL.
+ * Note that some signals reported by signal_generate tracepoint can be
+ * lost, ignored or modified (by debugger) before hitting this tracepoint.
+ * This means, this can show which signals are actually delivered, but
+ * matching generated signals and delivered signals may not be correct.
+ */
+TRACE_EVENT(signal_deliver,
+
+       TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka),
+
+       TP_ARGS(sig, info, ka),
+
+       TP_STRUCT__entry(
+               __field(        int,            sig             )
+               __field(        int,            errno           )
+               __field(        int,            code            )
+               __field(        unsigned long,  sa_handler      )
+               __field(        unsigned long,  sa_flags        )
+       ),
+
+       TP_fast_assign(
+               __entry->sig    = sig;
+               TP_STORE_SIGINFO(__entry, info);
+               __entry->sa_handler     = (unsigned long)ka->sa.sa_handler;
+               __entry->sa_flags       = ka->sa.sa_flags;
+       ),
+
+       TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx",
+                 __entry->sig, __entry->errno, __entry->code,
+                 __entry->sa_handler, __entry->sa_flags)
+);
+
+/**
+ * signal_overflow_fail - called when signal queue is overflow
+ * @sig: signal number
+ * @group: signal to process group or not (bool)
+ * @info: pointer to struct siginfo
+ *
+ * Kernel fails to generate 'sig' signal with 'info' siginfo, because
+ * siginfo queue is overflow, and the signal is dropped.
+ * 'group' is not 0 if the signal will be sent to a process group.
+ * 'sig' is always one of RT signals.
+ */
+TRACE_EVENT(signal_overflow_fail,
+
+       TP_PROTO(int sig, int group, struct siginfo *info),
+
+       TP_ARGS(sig, group, info),
+
+       TP_STRUCT__entry(
+               __field(        int,    sig     )
+               __field(        int,    group   )
+               __field(        int,    errno   )
+               __field(        int,    code    )
+       ),
+
+       TP_fast_assign(
+               __entry->sig    = sig;
+               __entry->group  = group;
+               TP_STORE_SIGINFO(__entry, info);
+       ),
+
+       TP_printk("sig=%d group=%d errno=%d code=%d",
+                 __entry->sig, __entry->group, __entry->errno, __entry->code)
+);
+
+/**
+ * signal_lose_info - called when siginfo is lost
+ * @sig: signal number
+ * @group: signal to process group or not (bool)
+ * @info: pointer to struct siginfo
+ *
+ * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
+ * queue is overflow.
+ * 'group' is not 0 if the signal will be sent to a process group.
+ * 'sig' is always one of non-RT signals.
+ */
+TRACE_EVENT(signal_lose_info,
+
+       TP_PROTO(int sig, int group, struct siginfo *info),
+
+       TP_ARGS(sig, group, info),
+
+       TP_STRUCT__entry(
+               __field(        int,    sig     )
+               __field(        int,    group   )
+               __field(        int,    errno   )
+               __field(        int,    code    )
+       ),
+
+       TP_fast_assign(
+               __entry->sig    = sig;
+               __entry->group  = group;
+               TP_STORE_SIGINFO(__entry, info);
+       ),
+
+       TP_printk("sig=%d group=%d errno=%d code=%d",
+                 __entry->sig, __entry->group, __entry->errno, __entry->code)
+);
+#endif /* _TRACE_SIGNAL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index e4612dbd7ba6070d3d2a28e8a7f4359524cbe77c..d6c974474e70272bd41182ecbbaa4a2102861a0e 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
 
-TRACE_EVENT(workqueue_insertion,
+DECLARE_EVENT_CLASS(workqueue,
 
        TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
@@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion,
                __entry->thread_pid, __entry->func)
 );
 
-TRACE_EVENT(workqueue_execution,
+DEFINE_EVENT(workqueue, workqueue_insertion,
 
        TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
-       TP_ARGS(wq_thread, work),
+       TP_ARGS(wq_thread, work)
+);
 
-       TP_STRUCT__entry(
-               __array(char,           thread_comm,    TASK_COMM_LEN)
-               __field(pid_t,          thread_pid)
-               __field(work_func_t,    func)
-       ),
+DEFINE_EVENT(workqueue, workqueue_execution,
 
-       TP_fast_assign(
-               memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
-               __entry->thread_pid     = wq_thread->pid;
-               __entry->func           = work->func;
-       ),
+       TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
-       TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
-               __entry->thread_pid, __entry->func)
+       TP_ARGS(wq_thread, work)
 );
 
 /* Trace the creation of one workqueue thread on a cpu */
index 4945d1c998645548a818a939bf1ad30b0a497734..2c9c073e45ad1662ee5f88dc80fe7aff757f3042 100644 (file)
 
 #include <linux/ftrace_event.h>
 
+/*
+ * DECLARE_EVENT_CLASS can be used to add a generic function
+ * handlers for events. That is, if all events have the same
+ * parameters and just have distinct trace points.
+ * Each tracepoint can be defined with DEFINE_EVENT and that
+ * will map the DECLARE_EVENT_CLASS to the tracepoint.
+ *
+ * TRACE_EVENT is a one to one mapping between tracepoint and template.
+ */
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
+       DECLARE_EVENT_CLASS(name,                              \
+                            PARAMS(proto),                    \
+                            PARAMS(args),                     \
+                            PARAMS(tstruct),                  \
+                            PARAMS(assign),                   \
+                            PARAMS(print));                   \
+       DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
+
+
 #undef __field
 #define __field(type, item)            type    item;
 
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
-       struct ftrace_raw_##name {                              \
-               struct trace_entry      ent;                    \
-               tstruct                                         \
-               char                    __data[0];              \
-       };                                                      \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \
+       struct ftrace_raw_##name {                                      \
+               struct trace_entry      ent;                            \
+               tstruct                                                 \
+               char                    __data[0];                      \
+       };
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)      \
        static struct ftrace_event_call event_##name
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef __cpparg
 #define __cpparg(arg...) arg
 
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
        struct ftrace_data_offsets_##call {                             \
                tstruct;                                                \
        };
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
 #undef TP_perf_assign
 #define TP_perf_assign(args...)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)           \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)   \
 static int                                                             \
-ftrace_format_##call(struct ftrace_event_call *unused,                 \
-                     struct trace_seq *s)                              \
+ftrace_format_setup_##call(struct ftrace_event_call *unused,           \
+                          struct trace_seq *s)                         \
 {                                                                      \
        struct ftrace_raw_##call field __attribute__((unused));         \
        int ret = 0;                                                    \
                                                                        \
        tstruct;                                                        \
                                                                        \
+       return ret;                                                     \
+}                                                                      \
+                                                                       \
+static int                                                             \
+ftrace_format_##call(struct ftrace_event_call *unused,                 \
+                    struct trace_seq *s)                               \
+{                                                                      \
+       int ret = 0;                                                    \
+                                                                       \
+       ret = ftrace_format_setup_##call(unused, s);                    \
+       if (!ret)                                                       \
+               return ret;                                             \
+                                                                       \
+       ret = trace_seq_printf(s, "\nprint fmt: " print);               \
+                                                                       \
+       return ret;                                                     \
+}
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)         \
+static int                                                             \
+ftrace_format_##name(struct ftrace_event_call *unused,                 \
+                     struct trace_seq *s)                              \
+{                                                                      \
+       int ret = 0;                                                    \
+                                                                       \
+       ret = ftrace_format_setup_##template(unused, s);                \
+       if (!ret)                                                       \
+               return ret;                                             \
+                                                                       \
        trace_seq_printf(s, "\nprint fmt: " print);                     \
                                                                        \
        return ret;                                                     \
@@ -255,15 +321,57 @@ ftrace_format_##call(struct ftrace_event_call *unused,                    \
                ftrace_print_symbols_seq(p, value, symbols);            \
        })
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
 static enum print_line_t                                               \
-ftrace_raw_output_##call(struct trace_iterator *iter, int flags)       \
+ftrace_raw_output_id_##call(int event_id, const char *name,            \
+                           struct trace_iterator *iter, int flags)     \
 {                                                                      \
        struct trace_seq *s = &iter->seq;                               \
        struct ftrace_raw_##call *field;                                \
        struct trace_entry *entry;                                      \
        struct trace_seq *p;                                            \
+       int ret;                                                        \
+                                                                       \
+       entry = iter->ent;                                              \
+                                                                       \
+       if (entry->type != event_id) {                                  \
+               WARN_ON_ONCE(1);                                        \
+               return TRACE_TYPE_UNHANDLED;                            \
+       }                                                               \
+                                                                       \
+       field = (typeof(field))entry;                                   \
+                                                                       \
+       p = &get_cpu_var(ftrace_event_seq);                             \
+       trace_seq_init(p);                                              \
+       ret = trace_seq_printf(s, "%s: ", name);                        \
+       if (ret)                                                        \
+               ret = trace_seq_printf(s, print);                       \
+       put_cpu();                                                      \
+       if (!ret)                                                       \
+               return TRACE_TYPE_PARTIAL_LINE;                         \
+                                                                       \
+       return TRACE_TYPE_HANDLED;                                      \
+}
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)                      \
+static enum print_line_t                                               \
+ftrace_raw_output_##name(struct trace_iterator *iter, int flags)       \
+{                                                                      \
+       return ftrace_raw_output_id_##template(event_##name.id,         \
+                                              #name, iter, flags);     \
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, call, proto, args, print)         \
+static enum print_line_t                                               \
+ftrace_raw_output_##call(struct trace_iterator *iter, int flags)       \
+{                                                                      \
+       struct trace_seq *s = &iter->seq;                               \
+       struct ftrace_raw_##template *field;                            \
+       struct trace_entry *entry;                                      \
+       struct trace_seq *p;                                            \
        int ret;                                                        \
                                                                        \
        entry = iter->ent;                                              \
@@ -277,14 +385,16 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)  \
                                                                        \
        p = &get_cpu_var(ftrace_event_seq);                             \
        trace_seq_init(p);                                              \
-       ret = trace_seq_printf(s, #call ": " print);                    \
+       ret = trace_seq_printf(s, "%s: ", #call);                       \
+       if (ret)                                                        \
+               ret = trace_seq_printf(s, print);                       \
        put_cpu();                                                      \
        if (!ret)                                                       \
                return TRACE_TYPE_PARTIAL_LINE;                         \
                                                                        \
        return TRACE_TYPE_HANDLED;                                      \
 }
-       
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #undef __field_ext
@@ -318,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)    \
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)           \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)   \
 static int                                                             \
 ftrace_define_fields_##call(struct ftrace_event_call *event_call)      \
 {                                                                      \
@@ -335,6 +445,13 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)  \
        return ret;                                                     \
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -361,10 +478,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
        __data_size += (len) * sizeof(type);
 
 #undef __string
-#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
 static inline int ftrace_get_offsets_##call(                           \
        struct ftrace_data_offsets_##call *__data_offsets, proto)       \
 {                                                                      \
@@ -376,6 +493,13 @@ static inline int ftrace_get_offsets_##call(                               \
        return __data_size;                                             \
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #ifdef CONFIG_EVENT_PROFILE
@@ -397,21 +521,28 @@ static inline int ftrace_get_offsets_##call(                              \
  *
  */
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)                      \
                                                                        \
-static void ftrace_profile_##call(proto);                              \
+static void ftrace_profile_##name(proto);                              \
                                                                        \
-static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\
+static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\
 {                                                                      \
-       return register_trace_##call(ftrace_profile_##call);            \
+       return register_trace_##name(ftrace_profile_##name);            \
 }                                                                      \
                                                                        \
-static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
+static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 {                                                                      \
-       unregister_trace_##call(ftrace_profile_##call);                 \
+       unregister_trace_##name(ftrace_profile_##name);                 \
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #endif
@@ -550,15 +681,13 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
 #define __assign_str(dst, src)                                         \
        strcpy(__get_str(dst), src);
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
                                                                        \
-static struct ftrace_event_call event_##call;                          \
-                                                                       \
-static void ftrace_raw_event_##call(proto)                             \
+static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
+                                      proto)                           \
 {                                                                      \
        struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-       struct ftrace_event_call *event_call = &event_##call;           \
        struct ring_buffer_event *event;                                \
        struct ftrace_raw_##call *entry;                                \
        struct ring_buffer *buffer;                                     \
@@ -572,7 +701,7 @@ static void ftrace_raw_event_##call(proto)                          \
        __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
                                                                        \
        event = trace_current_buffer_lock_reserve(&buffer,              \
-                                event_##call.id,                       \
+                                event_call->id,                        \
                                 sizeof(*entry) + __data_size,          \
                                 irq_flags, pc);                        \
        if (!event)                                                     \
@@ -587,6 +716,14 @@ static void ftrace_raw_event_##call(proto)                         \
        if (!filter_current_check_discard(buffer, event_call, entry, event)) \
                trace_nowake_buffer_unlock_commit(buffer,               \
                                                  event, irq_flags, pc); \
+}
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)                      \
+                                                                       \
+static void ftrace_raw_event_##call(proto)                             \
+{                                                                      \
+       ftrace_raw_event_id_##template(&event_##call, args);            \
 }                                                                      \
                                                                        \
 static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
@@ -619,7 +756,36 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
        event_##call.id = id;                                           \
        INIT_LIST_HEAD(&event_##call.fields);                           \
        return 0;                                                       \
-}                                                                      \
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)                      \
+                                                                       \
+static struct ftrace_event_call __used                                 \
+__attribute__((__aligned__(4)))                                                \
+__attribute__((section("_ftrace_events"))) event_##call = {            \
+       .name                   = #call,                                \
+       .system                 = __stringify(TRACE_SYSTEM),            \
+       .event                  = &ftrace_event_type_##call,            \
+       .raw_init               = ftrace_raw_init_event_##call,         \
+       .regfunc                = ftrace_raw_reg_event_##call,          \
+       .unregfunc              = ftrace_raw_unreg_event_##call,        \
+       .show_format            = ftrace_format_##template,             \
+       .define_fields          = ftrace_define_fields_##template,      \
+       _TRACE_PROFILE_INIT(call)                                       \
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, call, proto, args, print)         \
                                                                        \
 static struct ftrace_event_call __used                                 \
 __attribute__((__aligned__(4)))                                                \
@@ -631,7 +797,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {         \
        .regfunc                = ftrace_raw_reg_event_##call,          \
        .unregfunc              = ftrace_raw_unreg_event_##call,        \
        .show_format            = ftrace_format_##call,                 \
-       .define_fields          = ftrace_define_fields_##call,          \
+       .define_fields          = ftrace_define_fields_##template,      \
        _TRACE_PROFILE_INIT(call)                                       \
 }
 
@@ -719,22 +885,26 @@ __attribute__((section("_ftrace_events"))) event_##call = {               \
 #undef __perf_count
 #define __perf_count(c) __count = (c)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
-static void ftrace_profile_##call(proto)                               \
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+static void                                                            \
+ftrace_profile_templ_##call(struct ftrace_event_call *event_call,      \
+                           proto)                                      \
 {                                                                      \
        struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-       struct ftrace_event_call *event_call = &event_##call;           \
+       extern int perf_swevent_get_recursion_context(void);            \
+       extern void perf_swevent_put_recursion_context(int rctx);       \
        extern void perf_tp_event(int, u64, u64, void *, int);          \
        struct ftrace_raw_##call *entry;                                \
-       struct perf_trace_buf *trace_buf;                               \
        u64 __addr = 0, __count = 1;                                    \
        unsigned long irq_flags;                                        \
        struct trace_entry *ent;                                        \
        int __entry_size;                                               \
        int __data_size;                                                \
+       char *trace_buf;                                                \
        char *raw_data;                                                 \
        int __cpu;                                                      \
+       int rctx;                                                       \
        int pc;                                                         \
                                                                        \
        pc = preempt_count();                                           \
@@ -749,6 +919,11 @@ static void ftrace_profile_##call(proto)                           \
                return;                                                 \
                                                                        \
        local_irq_save(irq_flags);                                      \
+                                                                       \
+       rctx = perf_swevent_get_recursion_context();                    \
+       if (rctx < 0)                                                   \
+               goto end_recursion;                                     \
+                                                                       \
        __cpu = smp_processor_id();                                     \
                                                                        \
        if (in_nmi())                                                   \
@@ -759,13 +934,7 @@ static void ftrace_profile_##call(proto)                           \
        if (!trace_buf)                                                 \
                goto end;                                               \
                                                                        \
-       trace_buf = per_cpu_ptr(trace_buf, __cpu);                      \
-       if (trace_buf->recursion++)                                     \
-               goto end_recursion;                                     \
-                                                                       \
-       barrier();                                                      \
-                                                                       \
-       raw_data = trace_buf->buf;                                      \
+       raw_data = per_cpu_ptr(trace_buf, __cpu);                       \
                                                                        \
        *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;         \
        entry = (struct ftrace_raw_##call *)raw_data;                   \
@@ -780,13 +949,26 @@ static void ftrace_profile_##call(proto)                          \
        perf_tp_event(event_call->id, __addr, __count, entry,           \
                             __entry_size);                             \
                                                                        \
-end_recursion:                                                         \
-       trace_buf->recursion--;                                         \
 end:                                                                   \
+       perf_swevent_put_recursion_context(rctx);                       \
+end_recursion:                                                         \
        local_irq_restore(irq_flags);                                   \
                                                                        \
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)              \
+static void ftrace_profile_##call(proto)                       \
+{                                                              \
+       struct ftrace_event_call *event_call = &event_##call;   \
+                                                               \
+       ftrace_profile_templ_##template(event_call, args);      \
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 #endif /* CONFIG_EVENT_PROFILE */
 
index 51ee17d3632a4800dd06fa879a7d6bf92605ea71..961fda3556bb828f62508dc3acb440c90f21e32d 100644 (file)
  * A syscall entry in the ftrace syscalls array.
  *
  * @name: name of the syscall
+ * @syscall_nr: number of the syscall
  * @nb_args: number of parameters it takes
  * @types: list of types as strings
  * @args: list of args as strings (args[i] matches types[i])
- * @enter_id: associated ftrace enter event id
- * @exit_id: associated ftrace exit event id
  * @enter_event: associated syscall_enter trace event
  * @exit_event: associated syscall_exit trace event
  */
 struct syscall_metadata {
        const char      *name;
+       int             syscall_nr;
        int             nb_args;
        const char      **types;
        const char      **args;
-       int             enter_id;
-       int             exit_id;
 
        struct ftrace_event_call *enter_event;
        struct ftrace_event_call *exit_event;
@@ -34,11 +32,7 @@ struct syscall_metadata {
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
-extern int syscall_name_to_nr(char *name);
-void set_syscall_enter_id(int num, int id);
-void set_syscall_exit_id(int num, int id);
-extern struct trace_event event_syscall_enter;
-extern struct trace_event event_syscall_exit;
+extern int init_syscall_trace(struct ftrace_event_call *call);
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
                                struct trace_seq *s);
@@ -56,10 +50,10 @@ enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
 #ifdef CONFIG_EVENT_PROFILE
-int reg_prof_syscall_enter(char *name);
-void unreg_prof_syscall_enter(char *name);
-int reg_prof_syscall_exit(char *name);
-void unreg_prof_syscall_exit(char *name);
+int prof_sysenter_enable(struct ftrace_event_call *call);
+void prof_sysenter_disable(struct ftrace_event_call *call);
+int prof_sysexit_enable(struct ftrace_event_call *call);
+void prof_sysexit_disable(struct ftrace_event_call *call);
 
 #endif
 
index b8d4cd8ac0b9d5d93e303833e70ed55d450582ee..6b7ce8173dfdc063676dd6d2a565fbcc56cc7603 100644 (file)
@@ -21,6 +21,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
 endif
 
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -95,6 +96,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
index f7864ac2ecc1ad54c0af6b06b6f9d2da4a93f1ac..3f45e3cf931d917fc1dca145be32618afda895cd 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/init_task.h>
 #include <linux/perf_event.h>
 #include <trace/events/sched.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -977,6 +978,10 @@ NORET_TYPE void do_exit(long code)
 
        proc_exit_connector(tsk);
 
+       /*
+        * FIXME: do that only when needed, using sched_exit tracepoint
+        */
+       flush_ptrace_hw_breakpoint(tsk);
        /*
         * Flush inherited counters to the parent - before the parent
         * gets woken up by child-exit notifications.
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644 (file)
index 0000000..cf5ee16
--- /dev/null
@@ -0,0 +1,423 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) IBM Corporation, 2009
+ * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Thanks to Ingo Molnar for his many suggestions.
+ *
+ * Authors: Alan Stern <stern@rowland.harvard.edu>
+ *          K.Prasad <prasad@linux.vnet.ibm.com>
+ *          Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ * This file contains the arch-independent routines.
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <linux/hw_breakpoint.h>
+
+/*
+ * Constraints data
+ */
+
+/* Number of pinned cpu breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
+
+/* Number of pinned task breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
+
+/* Number of non-pinned cpu/task breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
+
+/* Gather the number of total pinned and un-pinned bp in a cpuset */
+struct bp_busy_slots {
+       unsigned int pinned;
+       unsigned int flexible;
+};
+
+/* Serialize accesses to the above constraints */
+static DEFINE_MUTEX(nr_bp_mutex);
+
+/*
+ * Report the maximum number of pinned breakpoints a task
+ * have in this cpu
+ */
+static unsigned int max_task_bp_pinned(int cpu)
+{
+       int i;
+       unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
+
+       for (i = HBP_NUM -1; i >= 0; i--) {
+               if (tsk_pinned[i] > 0)
+                       return i + 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Report the number of pinned/un-pinned breakpoints we have in
+ * a given cpu (cpu > -1) or in all of them (cpu = -1).
+ */
+static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
+{
+       if (cpu >= 0) {
+               slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
+               slots->pinned += max_task_bp_pinned(cpu);
+               slots->flexible = per_cpu(nr_bp_flexible, cpu);
+
+               return;
+       }
+
+       for_each_online_cpu(cpu) {
+               unsigned int nr;
+
+               nr = per_cpu(nr_cpu_bp_pinned, cpu);
+               nr += max_task_bp_pinned(cpu);
+
+               if (nr > slots->pinned)
+                       slots->pinned = nr;
+
+               nr = per_cpu(nr_bp_flexible, cpu);
+
+               if (nr > slots->flexible)
+                       slots->flexible = nr;
+       }
+}
+
+/*
+ * Add a pinned breakpoint for the given task in our constraint table
+ */
+static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
+{
+       int count = 0;
+       struct perf_event *bp;
+       struct perf_event_context *ctx = tsk->perf_event_ctxp;
+       unsigned int *tsk_pinned;
+       struct list_head *list;
+       unsigned long flags;
+
+       if (WARN_ONCE(!ctx, "No perf context for this task"))
+               return;
+
+       list = &ctx->event_list;
+
+       spin_lock_irqsave(&ctx->lock, flags);
+
+       /*
+        * The current breakpoint counter is not included in the list
+        * at the open() callback time
+        */
+       list_for_each_entry(bp, list, event_entry) {
+               if (bp->attr.type == PERF_TYPE_BREAKPOINT)
+                       count++;
+       }
+
+       spin_unlock_irqrestore(&ctx->lock, flags);
+
+       if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
+               return;
+
+       tsk_pinned = per_cpu(task_bp_pinned, cpu);
+       if (enable) {
+               tsk_pinned[count]++;
+               if (count > 0)
+                       tsk_pinned[count-1]--;
+       } else {
+               tsk_pinned[count]--;
+               if (count > 0)
+                       tsk_pinned[count-1]++;
+       }
+}
+
+/*
+ * Add/remove the given breakpoint in our constraint table
+ */
+static void toggle_bp_slot(struct perf_event *bp, bool enable)
+{
+       int cpu = bp->cpu;
+       struct task_struct *tsk = bp->ctx->task;
+
+       /* Pinned counter task profiling */
+       if (tsk) {
+               if (cpu >= 0) {
+                       toggle_bp_task_slot(tsk, cpu, enable);
+                       return;
+               }
+
+               for_each_online_cpu(cpu)
+                       toggle_bp_task_slot(tsk, cpu, enable);
+               return;
+       }
+
+       /* Pinned counter cpu profiling */
+       if (enable)
+               per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
+       else
+               per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
+}
+
+/*
+ * Contraints to check before allowing this new breakpoint counter:
+ *
+ *  == Non-pinned counter == (Considered as pinned for now)
+ *
+ *   - If attached to a single cpu, check:
+ *
+ *       (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
+ *           + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
+ *
+ *       -> If there are already non-pinned counters in this cpu, it means
+ *          there is already a free slot for them.
+ *          Otherwise, we check that the maximum number of per task
+ *          breakpoints (for this cpu) plus the number of per cpu breakpoint
+ *          (for this cpu) doesn't cover every registers.
+ *
+ *   - If attached to every cpus, check:
+ *
+ *       (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
+ *           + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
+ *
+ *       -> This is roughly the same, except we check the number of per cpu
+ *          bp for every cpu and we keep the max one. Same for the per tasks
+ *          breakpoints.
+ *
+ *
+ * == Pinned counter ==
+ *
+ *   - If attached to a single cpu, check:
+ *
+ *       ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
+ *            + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
+ *
+ *       -> Same checks as before. But now the nr_bp_flexible, if any, must keep
+ *          one register at least (or they will never be fed).
+ *
+ *   - If attached to every cpus, check:
+ *
+ *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
+ *            + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
+ */
+int reserve_bp_slot(struct perf_event *bp)
+{
+       struct bp_busy_slots slots = {0};
+       int ret = 0;
+
+       mutex_lock(&nr_bp_mutex);
+
+       fetch_bp_busy_slots(&slots, bp->cpu);
+
+       /* Flexible counters need to keep at least one slot */
+       if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
+               ret = -ENOSPC;
+               goto end;
+       }
+
+       toggle_bp_slot(bp, true);
+
+end:
+       mutex_unlock(&nr_bp_mutex);
+
+       return ret;
+}
+
+void release_bp_slot(struct perf_event *bp)
+{
+       mutex_lock(&nr_bp_mutex);
+
+       toggle_bp_slot(bp, false);
+
+       mutex_unlock(&nr_bp_mutex);
+}
+
+
+int __register_perf_hw_breakpoint(struct perf_event *bp)
+{
+       int ret;
+
+       ret = reserve_bp_slot(bp);
+       if (ret)
+               return ret;
+
+       /*
+        * Ptrace breakpoints can be temporary perf events only
+        * meant to reserve a slot. In this case, it is created disabled and
+        * we don't want to check the params right now (as we put a null addr)
+        * But perf tools create events as disabled and we want to check
+        * the params for them.
+        * This is a quick hack that will be removed soon, once we remove
+        * the tmp breakpoints from ptrace
+        */
+       if (!bp->attr.disabled || bp->callback == perf_bp_event)
+               ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+
+       return ret;
+}
+
+int register_perf_hw_breakpoint(struct perf_event *bp)
+{
+       bp->callback = perf_bp_event;
+
+       return __register_perf_hw_breakpoint(bp);
+}
+
+/**
+ * register_user_hw_breakpoint - register a hardware breakpoint for user space
+ * @attr: breakpoint attributes
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ */
+struct perf_event *
+register_user_hw_breakpoint(struct perf_event_attr *attr,
+                           perf_callback_t triggered,
+                           struct task_struct *tsk)
+{
+       return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
+}
+EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
+
+/**
+ * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
+ * @bp: the breakpoint structure to modify
+ * @attr: new breakpoint attributes
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ */
+struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
+                         perf_callback_t triggered,
+                         struct task_struct *tsk)
+{
+       /*
+        * FIXME: do it without unregistering
+        * - We don't want to lose our slot
+        * - If the new bp is incorrect, don't lose the older one
+        */
+       unregister_hw_breakpoint(bp);
+
+       return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
+}
+EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
+
+/**
+ * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
+ * @bp: the breakpoint structure to unregister
+ */
+void unregister_hw_breakpoint(struct perf_event *bp)
+{
+       if (!bp)
+               return;
+       perf_event_release_kernel(bp);
+}
+EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
+
+/**
+ * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
+ * @attr: breakpoint attributes
+ * @triggered: callback to trigger when we hit the breakpoint
+ *
+ * @return a set of per_cpu pointers to perf events
+ */
+struct perf_event **
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+                           perf_callback_t triggered)
+{
+       struct perf_event **cpu_events, **pevent, *bp;
+       long err;
+       int cpu;
+
+       cpu_events = alloc_percpu(typeof(*cpu_events));
+       if (!cpu_events)
+               return ERR_PTR(-ENOMEM);
+
+       for_each_possible_cpu(cpu) {
+               pevent = per_cpu_ptr(cpu_events, cpu);
+               bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
+
+               *pevent = bp;
+
+               if (IS_ERR(bp)) {
+                       err = PTR_ERR(bp);
+                       goto fail;
+               }
+       }
+
+       return cpu_events;
+
+fail:
+       for_each_possible_cpu(cpu) {
+               pevent = per_cpu_ptr(cpu_events, cpu);
+               if (IS_ERR(*pevent))
+                       break;
+               unregister_hw_breakpoint(*pevent);
+       }
+       free_percpu(cpu_events);
+       /* return the error if any */
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
+
+/**
+ * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
+ * @cpu_events: the per cpu set of events to unregister
+ */
+void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
+{
+       int cpu;
+       struct perf_event **pevent;
+
+       for_each_possible_cpu(cpu) {
+               pevent = per_cpu_ptr(cpu_events, cpu);
+               unregister_hw_breakpoint(*pevent);
+       }
+       free_percpu(cpu_events);
+}
+EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
+
+static struct notifier_block hw_breakpoint_exceptions_nb = {
+       .notifier_call = hw_breakpoint_exceptions_notify,
+       /* we need to be notified first */
+       .priority = 0x7fffffff
+};
+
+static int __init init_hw_breakpoint(void)
+{
+       return register_die_notifier(&hw_breakpoint_exceptions_nb);
+}
+core_initcall(init_hw_breakpoint);
+
+
+struct pmu perf_ops_bp = {
+       .enable         = arch_install_hw_breakpoint,
+       .disable        = arch_uninstall_hw_breakpoint,
+       .read           = hw_breakpoint_pmu_read,
+       .unthrottle     = hw_breakpoint_pmu_unthrottle
+};
index 8b6b8b697c686a297de3c85421e1e5172ff53a27..8e5288a8a3555c419f477e0925f3210e3863cef6 100644 (file)
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
        }
        return module_kallsyms_lookup_name(name);
 }
+EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
 
 int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
                                      unsigned long),
index 3256e36ad251f1dc745469d8c572fc6e497c1772..6b7ddba1dd640cc94f2af66fd163961e944a94f2 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/irq_regs.h>
 
@@ -245,6 +246,49 @@ static void perf_unpin_context(struct perf_event_context *ctx)
        put_ctx(ctx);
 }
 
+static inline u64 perf_clock(void)
+{
+       return cpu_clock(smp_processor_id());
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_event_context *ctx)
+{
+       u64 now = perf_clock();
+
+       ctx->time += now - ctx->timestamp;
+       ctx->timestamp = now;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a event.
+ */
+static void update_event_times(struct perf_event *event)
+{
+       struct perf_event_context *ctx = event->ctx;
+       u64 run_end;
+
+       if (event->state < PERF_EVENT_STATE_INACTIVE ||
+           event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
+               return;
+
+       if (ctx->is_active)
+               run_end = ctx->time;
+       else
+               run_end = event->tstamp_stopped;
+
+       event->total_time_enabled = run_end - event->tstamp_enabled;
+
+       if (event->state == PERF_EVENT_STATE_INACTIVE)
+               run_end = event->tstamp_stopped;
+       else
+               run_end = ctx->time;
+
+       event->total_time_running = run_end - event->tstamp_running;
+}
+
 /*
  * Add a event from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
@@ -293,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
        if (event->group_leader != event)
                event->group_leader->nr_siblings--;
 
+       update_event_times(event);
+
+       /*
+        * If event was in error state, then keep it
+        * that way, otherwise bogus counts will be
+        * returned on read(). The only way to get out
+        * of error state is by explicit re-enabling
+        * of the event
+        */
+       if (event->state > PERF_EVENT_STATE_OFF)
+               event->state = PERF_EVENT_STATE_OFF;
+
        /*
         * If this was a group event with sibling events then
         * upgrade the siblings to singleton events by adding them
@@ -446,50 +502,11 @@ retry:
         * can remove the event safely, if the call above did not
         * succeed.
         */
-       if (!list_empty(&event->group_entry)) {
+       if (!list_empty(&event->group_entry))
                list_del_event(event, ctx);
-       }
        spin_unlock_irq(&ctx->lock);
 }
 
-static inline u64 perf_clock(void)
-{
-       return cpu_clock(smp_processor_id());
-}
-
-/*
- * Update the record of the current time in a context.
- */
-static void update_context_time(struct perf_event_context *ctx)
-{
-       u64 now = perf_clock();
-
-       ctx->time += now - ctx->timestamp;
-       ctx->timestamp = now;
-}
-
-/*
- * Update the total_time_enabled and total_time_running fields for a event.
- */
-static void update_event_times(struct perf_event *event)
-{
-       struct perf_event_context *ctx = event->ctx;
-       u64 run_end;
-
-       if (event->state < PERF_EVENT_STATE_INACTIVE ||
-           event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
-               return;
-
-       event->total_time_enabled = ctx->time - event->tstamp_enabled;
-
-       if (event->state == PERF_EVENT_STATE_INACTIVE)
-               run_end = event->tstamp_stopped;
-       else
-               run_end = ctx->time;
-
-       event->total_time_running = run_end - event->tstamp_running;
-}
-
 /*
  * Update total_time_enabled and total_time_running for all events in a group.
  */
@@ -1032,10 +1049,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
        update_context_time(ctx);
 
        perf_disable();
-       if (ctx->nr_active)
+       if (ctx->nr_active) {
                list_for_each_entry(event, &ctx->group_list, group_entry)
                        group_sched_out(event, cpuctx, ctx);
-
+       }
        perf_enable();
  out:
        spin_unlock(&ctx->lock);
@@ -1060,8 +1077,6 @@ static int context_equiv(struct perf_event_context *ctx1,
                && !ctx1->pin_count && !ctx2->pin_count;
 }
 
-static void __perf_event_read(void *event);
-
 static void __perf_event_sync_stat(struct perf_event *event,
                                     struct perf_event *next_event)
 {
@@ -1079,8 +1094,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
         */
        switch (event->state) {
        case PERF_EVENT_STATE_ACTIVE:
-               __perf_event_read(event);
-               break;
+               event->pmu->read(event);
+               /* fall-through */
 
        case PERF_EVENT_STATE_INACTIVE:
                update_event_times(event);
@@ -1119,6 +1134,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
        if (!ctx->nr_stat)
                return;
 
+       update_context_time(ctx);
+
        event = list_first_entry(&ctx->event_list,
                                   struct perf_event, event_entry);
 
@@ -1162,8 +1179,6 @@ void perf_event_task_sched_out(struct task_struct *task,
        if (likely(!ctx || !cpuctx->task_ctx))
                return;
 
-       update_context_time(ctx);
-
        rcu_read_lock();
        parent = rcu_dereference(ctx->parent_ctx);
        next_ctx = next->perf_event_ctxp;
@@ -1516,7 +1531,6 @@ static void __perf_event_read(void *info)
        struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
        struct perf_event *event = info;
        struct perf_event_context *ctx = event->ctx;
-       unsigned long flags;
 
        /*
         * If this is a task context, we need to check whether it is
@@ -1528,12 +1542,12 @@ static void __perf_event_read(void *info)
        if (ctx->task && cpuctx->task_ctx != ctx)
                return;
 
-       local_irq_save(flags);
-       if (ctx->is_active)
-               update_context_time(ctx);
-       event->pmu->read(event);
+       spin_lock(&ctx->lock);
+       update_context_time(ctx);
        update_event_times(event);
-       local_irq_restore(flags);
+       spin_unlock(&ctx->lock);
+
+       event->pmu->read(event);
 }
 
 static u64 perf_event_read(struct perf_event *event)
@@ -1546,7 +1560,13 @@ static u64 perf_event_read(struct perf_event *event)
                smp_call_function_single(event->oncpu,
                                         __perf_event_read, event, 1);
        } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+               struct perf_event_context *ctx = event->ctx;
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->lock, flags);
+               update_context_time(ctx);
                update_event_times(event);
+               spin_unlock_irqrestore(&ctx->lock, flags);
        }
 
        return atomic64_read(&event->count);
@@ -1700,16 +1720,10 @@ static void free_event(struct perf_event *event)
        call_rcu(&event->rcu_head, free_event_rcu);
 }
 
-/*
- * Called when the last reference to the file is gone.
- */
-static int perf_release(struct inode *inode, struct file *file)
+int perf_event_release_kernel(struct perf_event *event)
 {
-       struct perf_event *event = file->private_data;
        struct perf_event_context *ctx = event->ctx;
 
-       file->private_data = NULL;
-
        WARN_ON_ONCE(ctx->parent_ctx);
        mutex_lock(&ctx->mutex);
        perf_event_remove_from_context(event);
@@ -1724,6 +1738,19 @@ static int perf_release(struct inode *inode, struct file *file)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+       struct perf_event *event = file->private_data;
+
+       file->private_data = NULL;
+
+       return perf_event_release_kernel(event);
+}
 
 static int perf_event_read_size(struct perf_event *event)
 {
@@ -1750,91 +1777,94 @@ static int perf_event_read_size(struct perf_event *event)
        return size;
 }
 
-static u64 perf_event_read_value(struct perf_event *event)
+u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
        u64 total = 0;
 
+       *enabled = 0;
+       *running = 0;
+
+       mutex_lock(&event->child_mutex);
        total += perf_event_read(event);
-       list_for_each_entry(child, &event->child_list, child_list)
+       *enabled += event->total_time_enabled +
+                       atomic64_read(&event->child_total_time_enabled);
+       *running += event->total_time_running +
+                       atomic64_read(&event->child_total_time_running);
+
+       list_for_each_entry(child, &event->child_list, child_list) {
                total += perf_event_read(child);
+               *enabled += child->total_time_enabled;
+               *running += child->total_time_running;
+       }
+       mutex_unlock(&event->child_mutex);
 
        return total;
 }
-
-static int perf_event_read_entry(struct perf_event *event,
-                                  u64 read_format, char __user *buf)
-{
-       int n = 0, count = 0;
-       u64 values[2];
-
-       values[n++] = perf_event_read_value(event);
-       if (read_format & PERF_FORMAT_ID)
-               values[n++] = primary_event_id(event);
-
-       count = n * sizeof(u64);
-
-       if (copy_to_user(buf, values, count))
-               return -EFAULT;
-
-       return count;
-}
+EXPORT_SYMBOL_GPL(perf_event_read_value);
 
 static int perf_event_read_group(struct perf_event *event,
                                   u64 read_format, char __user *buf)
 {
        struct perf_event *leader = event->group_leader, *sub;
-       int n = 0, size = 0, err = -EFAULT;
-       u64 values[3];
+       int n = 0, size = 0, ret = -EFAULT;
+       struct perf_event_context *ctx = leader->ctx;
+       u64 values[5];
+       u64 count, enabled, running;
+
+       mutex_lock(&ctx->mutex);
+       count = perf_event_read_value(leader, &enabled, &running);
 
        values[n++] = 1 + leader->nr_siblings;
-       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-               values[n++] = leader->total_time_enabled +
-                       atomic64_read(&leader->child_total_time_enabled);
-       }
-       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-               values[n++] = leader->total_time_running +
-                       atomic64_read(&leader->child_total_time_running);
-       }
+       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               values[n++] = enabled;
+       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               values[n++] = running;
+       values[n++] = count;
+       if (read_format & PERF_FORMAT_ID)
+               values[n++] = primary_event_id(leader);
 
        size = n * sizeof(u64);
 
        if (copy_to_user(buf, values, size))
-               return -EFAULT;
-
-       err = perf_event_read_entry(leader, read_format, buf + size);
-       if (err < 0)
-               return err;
+               goto unlock;
 
-       size += err;
+       ret = size;
 
        list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-               err = perf_event_read_entry(sub, read_format,
-                               buf + size);
-               if (err < 0)
-                       return err;
+               n = 0;
+
+               values[n++] = perf_event_read_value(sub, &enabled, &running);
+               if (read_format & PERF_FORMAT_ID)
+                       values[n++] = primary_event_id(sub);
+
+               size = n * sizeof(u64);
+
+               if (copy_to_user(buf + ret, values, size)) {
+                       ret = -EFAULT;
+                       goto unlock;
+               }
 
-               size += err;
+               ret += size;
        }
+unlock:
+       mutex_unlock(&ctx->mutex);
 
-       return size;
+       return ret;
 }
 
 static int perf_event_read_one(struct perf_event *event,
                                 u64 read_format, char __user *buf)
 {
+       u64 enabled, running;
        u64 values[4];
        int n = 0;
 
-       values[n++] = perf_event_read_value(event);
-       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-               values[n++] = event->total_time_enabled +
-                       atomic64_read(&event->child_total_time_enabled);
-       }
-       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-               values[n++] = event->total_time_running +
-                       atomic64_read(&event->child_total_time_running);
-       }
+       values[n++] = perf_event_read_value(event, &enabled, &running);
+       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               values[n++] = enabled;
+       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               values[n++] = running;
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(event);
 
@@ -1865,12 +1895,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
                return -ENOSPC;
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
-       mutex_lock(&event->child_mutex);
        if (read_format & PERF_FORMAT_GROUP)
                ret = perf_event_read_group(event, read_format, buf);
        else
                ret = perf_event_read_one(event, read_format, buf);
-       mutex_unlock(&event->child_mutex);
 
        return ret;
 }
@@ -2182,6 +2210,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
        perf_mmap_free_page((unsigned long)data->user_page);
        for (i = 0; i < data->nr_pages; i++)
                perf_mmap_free_page((unsigned long)data->data_pages[i]);
+       kfree(data);
 }
 
 #else
@@ -2222,6 +2251,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
                perf_mmap_unmark_page(base + (i * PAGE_SIZE));
 
        vfree(base);
+       kfree(data);
 }
 
 static void perf_mmap_data_free(struct perf_mmap_data *data)
@@ -2315,7 +2345,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
        }
 
        if (!data->watermark)
-               data->watermark = max_t(long, PAGE_SIZE, max_size / 2);
+               data->watermark = max_size / 2;
 
 
        rcu_assign_pointer(event->data, data);
@@ -2327,7 +2357,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
 
        data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
        perf_mmap_data_free(data);
-       kfree(data);
 }
 
 static void perf_mmap_data_release(struct perf_event *event)
@@ -3245,15 +3274,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
 {
        struct perf_event *event;
 
-       if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-               return;
-
-       rcu_read_lock();
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (perf_event_task_match(event))
                        perf_event_task_output(event, task_event);
        }
-       rcu_read_unlock();
 }
 
 static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3261,11 +3285,11 @@ static void perf_event_task_event(struct perf_task_event *task_event)
        struct perf_cpu_context *cpuctx;
        struct perf_event_context *ctx = task_event->task_ctx;
 
+       rcu_read_lock();
        cpuctx = &get_cpu_var(perf_cpu_context);
        perf_event_task_ctx(&cpuctx->ctx, task_event);
        put_cpu_var(perf_cpu_context);
 
-       rcu_read_lock();
        if (!ctx)
                ctx = rcu_dereference(task_event->task->perf_event_ctxp);
        if (ctx)
@@ -3357,15 +3381,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx,
 {
        struct perf_event *event;
 
-       if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-               return;
-
-       rcu_read_lock();
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (perf_event_comm_match(event))
                        perf_event_comm_output(event, comm_event);
        }
-       rcu_read_unlock();
 }
 
 static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3376,7 +3395,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
        char comm[TASK_COMM_LEN];
 
        memset(comm, 0, sizeof(comm));
-       strncpy(comm, comm_event->task->comm, sizeof(comm));
+       strlcpy(comm, comm_event->task->comm, sizeof(comm));
        size = ALIGN(strlen(comm)+1, sizeof(u64));
 
        comm_event->comm = comm;
@@ -3384,11 +3403,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 
        comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
 
+       rcu_read_lock();
        cpuctx = &get_cpu_var(perf_cpu_context);
        perf_event_comm_ctx(&cpuctx->ctx, comm_event);
        put_cpu_var(perf_cpu_context);
 
-       rcu_read_lock();
        /*
         * doesn't really matter which of the child contexts the
         * events ends up in.
@@ -3481,15 +3500,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx,
 {
        struct perf_event *event;
 
-       if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-               return;
-
-       rcu_read_lock();
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (perf_event_mmap_match(event, mmap_event))
                        perf_event_mmap_output(event, mmap_event);
        }
-       rcu_read_unlock();
 }
 
 static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3545,11 +3559,11 @@ got_name:
 
        mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
+       rcu_read_lock();
        cpuctx = &get_cpu_var(perf_cpu_context);
        perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
        put_cpu_var(perf_cpu_context);
 
-       rcu_read_lock();
        /*
         * doesn't really matter which of the child contexts the
         * events ends up in.
@@ -3688,7 +3702,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
                        perf_event_disable(event);
        }
 
-       perf_event_output(event, nmi, data, regs);
+       if (event->overflow_handler)
+               event->overflow_handler(event, nmi, data, regs);
+       else
+               perf_event_output(event, nmi, data, regs);
+
        return ret;
 }
 
@@ -3733,16 +3751,16 @@ again:
        return nr;
 }
 
-static void perf_swevent_overflow(struct perf_event *event,
+static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
                                    int nmi, struct perf_sample_data *data,
                                    struct pt_regs *regs)
 {
        struct hw_perf_event *hwc = &event->hw;
        int throttle = 0;
-       u64 overflow;
 
        data->period = event->hw.last_period;
-       overflow = perf_swevent_set_period(event);
+       if (!overflow)
+               overflow = perf_swevent_set_period(event);
 
        if (hwc->interrupts == MAX_INTERRUPTS)
                return;
@@ -3775,14 +3793,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 
        atomic64_add(nr, &event->count);
 
+       if (!regs)
+               return;
+
        if (!hwc->sample_period)
                return;
 
-       if (!regs)
+       if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
+               return perf_swevent_overflow(event, 1, nmi, data, regs);
+
+       if (atomic64_add_negative(nr, &hwc->period_left))
                return;
 
-       if (!atomic64_add_negative(nr, &hwc->period_left))
-               perf_swevent_overflow(event, nmi, data, regs);
+       perf_swevent_overflow(event, 0, nmi, data, regs);
 }
 
 static int perf_swevent_is_counting(struct perf_event *event)
@@ -3818,6 +3841,20 @@ static int perf_swevent_is_counting(struct perf_event *event)
 static int perf_tp_event_match(struct perf_event *event,
                                struct perf_sample_data *data);
 
+static int perf_exclude_event(struct perf_event *event,
+                             struct pt_regs *regs)
+{
+       if (regs) {
+               if (event->attr.exclude_user && user_mode(regs))
+                       return 1;
+
+               if (event->attr.exclude_kernel && !user_mode(regs))
+                       return 1;
+       }
+
+       return 0;
+}
+
 static int perf_swevent_match(struct perf_event *event,
                                enum perf_type_id type,
                                u32 event_id,
@@ -3829,16 +3866,12 @@ static int perf_swevent_match(struct perf_event *event,
 
        if (event->attr.type != type)
                return 0;
+
        if (event->attr.config != event_id)
                return 0;
 
-       if (regs) {
-               if (event->attr.exclude_user && user_mode(regs))
-                       return 0;
-
-               if (event->attr.exclude_kernel && !user_mode(regs))
-                       return 0;
-       }
+       if (perf_exclude_event(event, regs))
+               return 0;
 
        if (event->attr.type == PERF_TYPE_TRACEPOINT &&
            !perf_tp_event_match(event, data))
@@ -3855,49 +3888,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 {
        struct perf_event *event;
 
-       if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-               return;
-
-       rcu_read_lock();
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (perf_swevent_match(event, type, event_id, data, regs))
                        perf_swevent_add(event, nr, nmi, data, regs);
        }
-       rcu_read_unlock();
 }
 
-static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx)
+int perf_swevent_get_recursion_context(void)
 {
+       struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+       int rctx;
+
        if (in_nmi())
-               return &cpuctx->recursion[3];
+               rctx = 3;
+       else if (in_irq())
+               rctx = 2;
+       else if (in_softirq())
+               rctx = 1;
+       else
+               rctx = 0;
 
-       if (in_irq())
-               return &cpuctx->recursion[2];
+       if (cpuctx->recursion[rctx]) {
+               put_cpu_var(perf_cpu_context);
+               return -1;
+       }
 
-       if (in_softirq())
-               return &cpuctx->recursion[1];
+       cpuctx->recursion[rctx]++;
+       barrier();
 
-       return &cpuctx->recursion[0];
+       return rctx;
 }
+EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
+
+void perf_swevent_put_recursion_context(int rctx)
+{
+       struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+       barrier();
+       cpuctx->recursion[rctx]--;
+       put_cpu_var(perf_cpu_context);
+}
+EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
 
 static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
                                    u64 nr, int nmi,
                                    struct perf_sample_data *data,
                                    struct pt_regs *regs)
 {
-       struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
-       int *recursion = perf_swevent_recursion_context(cpuctx);
+       struct perf_cpu_context *cpuctx;
        struct perf_event_context *ctx;
 
-       if (*recursion)
-               goto out;
-
-       (*recursion)++;
-       barrier();
-
+       cpuctx = &__get_cpu_var(perf_cpu_context);
+       rcu_read_lock();
        perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
                                 nr, nmi, data, regs);
-       rcu_read_lock();
        /*
         * doesn't really matter which of the child contexts the
         * events ends up in.
@@ -3906,23 +3949,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
        if (ctx)
                perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
        rcu_read_unlock();
-
-       barrier();
-       (*recursion)--;
-
-out:
-       put_cpu_var(perf_cpu_context);
 }
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
                            struct pt_regs *regs, u64 addr)
 {
-       struct perf_sample_data data = {
-               .addr = addr,
-       };
+       struct perf_sample_data data;
+       int rctx;
 
-       do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi,
-                               &data, regs);
+       rctx = perf_swevent_get_recursion_context();
+       if (rctx < 0)
+               return;
+
+       data.addr = addr;
+       data.raw  = NULL;
+
+       do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
+
+       perf_swevent_put_recursion_context(rctx);
 }
 
 static void perf_swevent_read(struct perf_event *event)
@@ -3967,6 +4011,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
        event->pmu->read(event);
 
        data.addr = 0;
+       data.period = event->hw.last_period;
        regs = get_irq_regs();
        /*
         * In case we exclude kernel IPs or are somehow not in interrupt
@@ -4145,6 +4190,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
        if (!regs)
                regs = task_pt_regs(current);
 
+       /* Trace events already protected against recursion */
        do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
                                &data, regs);
 }
@@ -4231,6 +4277,53 @@ static void perf_event_free_filter(struct perf_event *event)
 
 #endif /* CONFIG_EVENT_PROFILE */
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+       release_bp_slot(event);
+}
+
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+       int err;
+       /*
+        * The breakpoint is already filled if we haven't created the counter
+        * through perf syscall
+        * FIXME: manage to get trigerred to NULL if it comes from syscalls
+        */
+       if (!bp->callback)
+               err = register_perf_hw_breakpoint(bp);
+       else
+               err = __register_perf_hw_breakpoint(bp);
+       if (err)
+               return ERR_PTR(err);
+
+       bp->destroy = bp_perf_event_destroy;
+
+       return &perf_ops_bp;
+}
+
+void perf_bp_event(struct perf_event *bp, void *data)
+{
+       struct perf_sample_data sample;
+       struct pt_regs *regs = data;
+
+       sample.addr = bp->attr.bp_addr;
+
+       if (!perf_exclude_event(bp, regs))
+               perf_swevent_add(bp, 1, 1, &sample, regs);
+}
+#else
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+       return NULL;
+}
+
+void perf_bp_event(struct perf_event *bp, void *regs)
+{
+}
+#endif
+
 atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_event_destroy(struct perf_event *event)
@@ -4297,6 +4390,7 @@ perf_event_alloc(struct perf_event_attr *attr,
                   struct perf_event_context *ctx,
                   struct perf_event *group_leader,
                   struct perf_event *parent_event,
+                  perf_callback_t callback,
                   gfp_t gfpflags)
 {
        const struct pmu *pmu;
@@ -4339,6 +4433,11 @@ perf_event_alloc(struct perf_event_attr *attr,
 
        event->state            = PERF_EVENT_STATE_INACTIVE;
 
+       if (!callback && parent_event)
+               callback = parent_event->callback;
+       
+       event->callback = callback;
+
        if (attr->disabled)
                event->state = PERF_EVENT_STATE_OFF;
 
@@ -4373,6 +4472,11 @@ perf_event_alloc(struct perf_event_attr *attr,
                pmu = tp_perf_event_init(event);
                break;
 
+       case PERF_TYPE_BREAKPOINT:
+               pmu = bp_perf_event_init(event);
+               break;
+
+
        default:
                break;
        }
@@ -4615,7 +4719,7 @@ SYSCALL_DEFINE5(perf_event_open,
        }
 
        event = perf_event_alloc(&attr, cpu, ctx, group_leader,
-                                    NULL, GFP_KERNEL);
+                                    NULL, NULL, GFP_KERNEL);
        err = PTR_ERR(event);
        if (IS_ERR(event))
                goto err_put_context;
@@ -4663,6 +4767,60 @@ err_put_context:
        return err;
 }
 
+/**
+ * perf_event_create_kernel_counter
+ *
+ * @attr: attributes of the counter to create
+ * @cpu: cpu in which the counter is bound
+ * @pid: task to profile
+ */
+struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+                                pid_t pid, perf_callback_t callback)
+{
+       struct perf_event *event;
+       struct perf_event_context *ctx;
+       int err;
+
+       /*
+        * Get the target context (task or percpu):
+        */
+
+       ctx = find_get_context(pid, cpu);
+       if (IS_ERR(ctx)) {
+               err = PTR_ERR(ctx);
+               goto err_exit;
+       }
+
+       event = perf_event_alloc(attr, cpu, ctx, NULL,
+                                    NULL, callback, GFP_KERNEL);
+       if (IS_ERR(event)) {
+               err = PTR_ERR(event);
+               goto err_put_context;
+       }
+
+       event->filp = NULL;
+       WARN_ON_ONCE(ctx->parent_ctx);
+       mutex_lock(&ctx->mutex);
+       perf_install_in_context(ctx, event, cpu);
+       ++ctx->generation;
+       mutex_unlock(&ctx->mutex);
+
+       event->owner = current;
+       get_task_struct(current);
+       mutex_lock(&current->perf_event_mutex);
+       list_add_tail(&event->owner_entry, &current->perf_event_list);
+       mutex_unlock(&current->perf_event_mutex);
+
+       return event;
+
+ err_put_context:
+       put_ctx(ctx);
+ err_exit:
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+
 /*
  * inherit a event from parent task to child task:
  */
@@ -4688,7 +4846,7 @@ inherit_event(struct perf_event *parent_event,
        child_event = perf_event_alloc(&parent_event->attr,
                                           parent_event->cpu, child_ctx,
                                           group_leader, parent_event,
-                                          GFP_KERNEL);
+                                          NULL, GFP_KERNEL);
        if (IS_ERR(child_event))
                return child_event;
        get_ctx(child_ctx);
@@ -4706,6 +4864,8 @@ inherit_event(struct perf_event *parent_event,
        if (parent_event->attr.freq)
                child_event->hw.sample_period = parent_event->hw.sample_period;
 
+       child_event->overflow_handler = parent_event->overflow_handler;
+
        /*
         * Link it up in the child's context:
         */
@@ -4795,7 +4955,6 @@ __perf_event_exit_task(struct perf_event *child_event,
 {
        struct perf_event *parent_event;
 
-       update_event_times(child_event);
        perf_event_remove_from_context(child_event);
 
        parent_event = child_event->parent;
@@ -4847,6 +5006,7 @@ void perf_event_exit_task(struct task_struct *child)
         * the events from it.
         */
        unclone_ctx(child_ctx);
+       update_context_time(child_ctx);
        spin_unlock_irqrestore(&child_ctx->lock, flags);
 
        /*
index 6705320784fd2b07a518c4d2bb47520d0b342aa8..93e72e5feae63bc0e9b6bb9e6da3ba3d57b8fcb4 100644 (file)
@@ -27,7 +27,8 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
-#include <trace/events/sched.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/signal.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -834,7 +835,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
        struct sigqueue *q;
        int override_rlimit;
 
-       trace_sched_signal_send(sig, t);
+       trace_signal_generate(sig, info, t);
 
        assert_spin_locked(&t->sighand->siglock);
 
@@ -896,12 +897,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
                        break;
                }
        } else if (!is_si_special(info)) {
-               if (sig >= SIGRTMIN && info->si_code != SI_USER)
-               /*
-                * Queue overflow, abort.  We may abort if the signal was rt
-                * and sent by user using something other than kill().
-                */
+               if (sig >= SIGRTMIN && info->si_code != SI_USER) {
+                       /*
+                        * Queue overflow, abort.  We may abort if the
+                        * signal was rt and sent by user using something
+                        * other than kill().
+                        */
+                       trace_signal_overflow_fail(sig, group, info);
                        return -EAGAIN;
+               } else {
+                       /*
+                        * This is a silent loss of information.  We still
+                        * send the signal, but the *info bits are lost.
+                        */
+                       trace_signal_lose_info(sig, group, info);
+               }
        }
 
 out_set:
@@ -1839,6 +1849,9 @@ relock:
                        ka = &sighand->action[signr-1];
                }
 
+               /* Trace actually delivered signals. */
+               trace_signal_deliver(signr, info, ka);
+
                if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
                        continue;
                if (ka->sa.sa_handler != SIG_DFL) {
index f05671609a897dba01bd32641e301d418e490849..d006554888dc68752a8813e5f6063379c2d98f47 100644 (file)
@@ -339,6 +339,27 @@ config POWER_TRACER
          power management decisions, specifically the C-state and P-state
          behavior.
 
+config KSYM_TRACER
+       bool "Trace read and write access on kernel memory locations"
+       depends on HAVE_HW_BREAKPOINT
+       select TRACING
+       help
+         This tracer helps find read and write operations on any given kernel
+         symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+       bool "Profile all kernel memory accesses on 'watched' variables"
+       depends on KSYM_TRACER
+       help
+         This tracer profiles kernel accesses on variables watched through the
+         ksym tracer ftrace plugin. Depending upon the hardware, all read
+         and write operations on kernel variables can be monitored for
+         accesses.
+
+         The results will be displayed in:
+         /debugfs/tracing/profile_ksym
+
+         Say N if unsure.
 
 config STACK_TRACER
        bool "Trace max stack"
index edc3a3cca1a16cbd4199a0b8035550f60f7ba593..cd9ecd89ec7714d34f16fd541beabd9ce0e504d2 100644 (file)
@@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
 obj-$(CONFIG_EVENT_TRACING) += power-traces.o
 
 libftrace-y := ftrace.o
index b4e4212e66d7d6905d835c425387aad291d6eb30..1d7f4830a80d93dd2b3c77a837ad18c5123819fc 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 #include <linux/kmemtrace.h>
+#include <linux/hw_breakpoint.h>
 
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
        TRACE_KMEM_ALLOC,
        TRACE_KMEM_FREE,
        TRACE_BLK,
+       TRACE_KSYM,
 
        __TRACE_LAST_TYPE,
 };
@@ -98,7 +100,7 @@ struct syscall_trace_enter {
 struct syscall_trace_exit {
        struct trace_entry      ent;
        int                     nr;
-       unsigned long           ret;
+       long                    ret;
 };
 
 struct kprobe_trace_entry {
@@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void);
                          TRACE_KMEM_ALLOC);    \
                IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
                          TRACE_KMEM_FREE);     \
+               IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
                __ftrace_bad_type();                                    \
        } while (0)
 
@@ -387,6 +390,8 @@ int register_tracer(struct tracer *type);
 void unregister_tracer(struct tracer *type);
 int is_tracing_stopped(void);
 
+extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
+
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
                                         struct trace_array *tr);
 extern int trace_selftest_startup_hw_branches(struct tracer *trace,
                                              struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+                                        struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
index ead3d724599d2d701b29014c1eb575529162d4f9..c16a08f399df53e9d9728d11e039f485b72e4986 100644 (file)
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
        F_printk("type:%u call_site:%lx ptr:%p",
                 __entry->type_id, __entry->call_site, __entry->ptr)
 );
+
+FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
+
+       TRACE_KSYM,
+
+       F_STRUCT(
+               __field(        unsigned long,  ip                        )
+               __field(        unsigned char,  type                      )
+               __array(        char         ,  cmd,       TASK_COMM_LEN  )
+               __field(        unsigned long,  addr                      )
+       ),
+
+       F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
+               (void *)__entry->ip, (unsigned int)__entry->type,
+               (void *)__entry->addr,  __entry->cmd)
+);
index e0d351b01f5ac9bacf230cf077cb1e5cbb82489a..d9c60f80aa0d20958c647a86310b29701b4bcdcc 100644 (file)
@@ -9,31 +9,33 @@
 #include "trace.h"
 
 
-struct perf_trace_buf *perf_trace_buf;
+char *perf_trace_buf;
 EXPORT_SYMBOL_GPL(perf_trace_buf);
 
-struct perf_trace_buf *perf_trace_buf_nmi;
+char *perf_trace_buf_nmi;
 EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
 
+typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
+
 /* Count the events in use (per event id, not per instance) */
 static int     total_profile_count;
 
 static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 {
-       struct perf_trace_buf *buf;
+       char *buf;
        int ret = -ENOMEM;
 
        if (atomic_inc_return(&event->profile_count))
                return 0;
 
        if (!total_profile_count) {
-               buf = alloc_percpu(struct perf_trace_buf);
+               buf = (char *)alloc_percpu(perf_trace_t);
                if (!buf)
                        goto fail_buf;
 
                rcu_assign_pointer(perf_trace_buf, buf);
 
-               buf = alloc_percpu(struct perf_trace_buf);
+               buf = (char *)alloc_percpu(perf_trace_t);
                if (!buf)
                        goto fail_buf_nmi;
 
@@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id)
 
 static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
-       struct perf_trace_buf *buf, *nmi_buf;
+       char *buf, *nmi_buf;
 
        if (!atomic_add_negative(-1, &event->profile_count))
                return;
index 3696476f307d05636e84afaf4eff560d5ab23780..aff5f80b59b8785cffe69091aa48a225d700add8 100644 (file)
@@ -243,7 +243,11 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
                ret = snprintf(buf, n, "@0x%p", ff->data);
        else if (ff->func == fetch_symbol) {
                struct symbol_cache *sc = ff->data;
-               ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
+               if (sc->offset)
+                       ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
+                                       sc->offset);
+               else
+                       ret = snprintf(buf, n, "@%s", sc->symbol);
        } else if (ff->func == fetch_retvalue)
                ret = snprintf(buf, n, "$retval");
        else if (ff->func == fetch_stack_address)
@@ -479,7 +483,8 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
        return ret;
 }
 
-static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
+/* Recursive argument parser */
+static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
 {
        int ret = 0;
        unsigned long param;
@@ -539,7 +544,7 @@ static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
                        if (!id)
                                return -ENOMEM;
                        id->offset = offset;
-                       ret = parse_probe_arg(arg, &id->orig, is_return);
+                       ret = __parse_probe_arg(arg, &id->orig, is_return);
                        if (ret)
                                kfree(id);
                        else {
@@ -556,6 +561,16 @@ static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
        return ret;
 }
 
+/* String length checking wrapper */
+static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
+{
+       if (strlen(arg) > MAX_ARGSTR_LEN) {
+               pr_info("Argument is too long.: %s\n",  arg);
+               return -ENOSPC;
+       }
+       return __parse_probe_arg(arg, ff, is_return);
+}
+
 /* Return 1 if name is reserved or already used by another argument */
 static int conflict_field_name(const char *name,
                               struct probe_arg *args, int narg)
@@ -694,20 +709,23 @@ static int create_trace_probe(int argc, char **argv)
                }
 
                tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
-
-               /* Parse fetch argument */
-               if (strlen(arg) > MAX_ARGSTR_LEN) {
-                       pr_info("Argument%d(%s) is too long.\n", i, arg);
-                       ret = -ENOSPC;
+               if (!tp->args[i].name) {
+                       pr_info("Failed to allocate argument%d name '%s'.\n",
+                               i, argv[i]);
+                       ret = -ENOMEM;
                        goto error;
                }
+
+               /* Parse fetch argument */
                ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
                if (ret) {
                        pr_info("Parse error at argument%d. (%d)\n", i, ret);
+                       kfree(tp->args[i].name);
                        goto error;
                }
+
+               tp->nr_args++;
        }
-       tp->nr_args = i;
 
        ret = register_trace_probe(tp);
        if (ret)
@@ -758,12 +776,14 @@ static int probes_seq_show(struct seq_file *m, void *v)
        char buf[MAX_ARGSTR_LEN + 1];
 
        seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
-       seq_printf(m, ":%s", tp->call.name);
+       seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
 
-       if (tp->symbol)
+       if (!tp->symbol)
+               seq_printf(m, " 0x%p", tp->rp.kp.addr);
+       else if (tp->rp.kp.offset)
                seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
        else
-               seq_printf(m, " 0x%p", tp->rp.kp.addr);
+               seq_printf(m, " %s", probe_symbol(tp));
 
        for (i = 0; i < tp->nr_args; i++) {
                ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
@@ -1208,11 +1228,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
        struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
        struct ftrace_event_call *call = &tp->call;
        struct kprobe_trace_entry *entry;
-       struct perf_trace_buf *trace_buf;
        struct trace_entry *ent;
        int size, __size, i, pc, __cpu;
        unsigned long irq_flags;
+       char *trace_buf;
        char *raw_data;
+       int rctx;
 
        pc = preempt_count();
        __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1227,6 +1248,11 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
         * This also protects the rcu read side
         */
        local_irq_save(irq_flags);
+
+       rctx = perf_swevent_get_recursion_context();
+       if (rctx < 0)
+               goto end_recursion;
+
        __cpu = smp_processor_id();
 
        if (in_nmi())
@@ -1237,18 +1263,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
        if (!trace_buf)
                goto end;
 
-       trace_buf = per_cpu_ptr(trace_buf, __cpu);
-
-       if (trace_buf->recursion++)
-               goto end_recursion;
-
-       /*
-        * Make recursion update visible before entering perf_tp_event
-        * so that we protect from perf recursions.
-        */
-       barrier();
-
-       raw_data = trace_buf->buf;
+       raw_data = per_cpu_ptr(trace_buf, __cpu);
 
        /* Zero dead bytes from alignment to avoid buffer leak to userspace */
        *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -1263,9 +1278,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
                entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
        perf_tp_event(call->id, entry->ip, 1, entry, size);
 
-end_recursion:
-       trace_buf->recursion--;
 end:
+       perf_swevent_put_recursion_context(rctx);
+end_recursion:
        local_irq_restore(irq_flags);
 
        return 0;
@@ -1278,11 +1293,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
        struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
        struct ftrace_event_call *call = &tp->call;
        struct kretprobe_trace_entry *entry;
-       struct perf_trace_buf *trace_buf;
        struct trace_entry *ent;
        int size, __size, i, pc, __cpu;
        unsigned long irq_flags;
+       char *trace_buf;
        char *raw_data;
+       int rctx;
 
        pc = preempt_count();
        __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1297,6 +1313,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
         * This also protects the rcu read side
         */
        local_irq_save(irq_flags);
+
+       rctx = perf_swevent_get_recursion_context();
+       if (rctx < 0)
+               goto end_recursion;
+
        __cpu = smp_processor_id();
 
        if (in_nmi())
@@ -1307,18 +1328,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
        if (!trace_buf)
                goto end;
 
-       trace_buf = per_cpu_ptr(trace_buf, __cpu);
-
-       if (trace_buf->recursion++)
-               goto end_recursion;
-
-       /*
-        * Make recursion update visible before entering perf_tp_event
-        * so that we protect from perf recursions.
-        */
-       barrier();
-
-       raw_data = trace_buf->buf;
+       raw_data = per_cpu_ptr(trace_buf, __cpu);
 
        /* Zero dead bytes from alignment to avoid buffer leak to userspace */
        *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -1334,9 +1344,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
                entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
        perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
 
-end_recursion:
-       trace_buf->recursion--;
 end:
+       perf_swevent_put_recursion_context(rctx);
+end_recursion:
        local_irq_restore(irq_flags);
 
        return 0;
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644 (file)
index 0000000..ddfa0fd
--- /dev/null
@@ -0,0 +1,550 @@
+/*
+ * trace_ksym.c - Kernel Symbol Tracer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+#include "trace_stat.h"
+#include "trace.h"
+
+#include <linux/hw_breakpoint.h>
+#include <asm/hw_breakpoint.h>
+
+/*
+ * For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HBP_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+
+struct trace_ksym {
+       struct perf_event       **ksym_hbp;
+       struct perf_event_attr  attr;
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+       unsigned long           counter;
+#endif
+       struct hlist_node       ksym_hlist;
+};
+
+static struct trace_array *ksym_trace_array;
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+static DEFINE_MUTEX(ksym_tracer_mutex);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+
+#define MAX_UL_INT 0xffffffff
+
+void ksym_collect_stats(unsigned long hbp_hit_addr)
+{
+       struct hlist_node *node;
+       struct trace_ksym *entry;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
+               if ((entry->attr.bp_addr == hbp_hit_addr) &&
+                   (entry->counter <= MAX_UL_INT)) {
+                       entry->counter++;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+}
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+void ksym_hbp_handler(struct perf_event *hbp, void *data)
+{
+       struct ring_buffer_event *event;
+       struct ksym_trace_entry *entry;
+       struct pt_regs *regs = data;
+       struct ring_buffer *buffer;
+       int pc;
+
+       if (!ksym_tracing_enabled)
+               return;
+
+       buffer = ksym_trace_array->buffer;
+
+       pc = preempt_count();
+
+       event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
+                                                       sizeof(*entry), 0, pc);
+       if (!event)
+               return;
+
+       entry           = ring_buffer_event_data(event);
+       entry->ip       = instruction_pointer(regs);
+       entry->type     = hw_breakpoint_type(hbp);
+       entry->addr     = hw_breakpoint_addr(hbp);
+       strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+       ksym_collect_stats(hw_breakpoint_addr(hbp));
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+       trace_buffer_unlock_commit(buffer, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *str)
+{
+       int access = 0;
+
+       if (str[0] == 'r')
+               access |= HW_BREAKPOINT_R;
+
+       if (str[1] == 'w')
+               access |= HW_BREAKPOINT_W;
+
+       if (str[2] == 'x')
+               access |= HW_BREAKPOINT_X;
+
+       switch (access) {
+       case HW_BREAKPOINT_R:
+       case HW_BREAKPOINT_W:
+       case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+               return access;
+       default:
+               return -EINVAL;
+       }
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ *    i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ *    <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+                                                       unsigned long *addr)
+{
+       int ret;
+
+       *ksymname = strsep(&input_string, ":");
+       *addr = kallsyms_lookup_name(*ksymname);
+
+       /* Check for malformed request: (2), (1) and (5) */
+       if ((!input_string) ||
+           (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
+           (*addr == 0))
+               return -EINVAL;;
+
+       ret = ksym_trace_get_access_type(input_string);
+
+       return ret;
+}
+
+int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+       struct trace_ksym *entry;
+       int ret = -ENOMEM;
+
+       if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+               printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+               " new requests for tracing can be accepted now.\n",
+                       KSYM_TRACER_MAX);
+               return -ENOSPC;
+       }
+
+       entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       hw_breakpoint_init(&entry->attr);
+
+       entry->attr.bp_type = op;
+       entry->attr.bp_addr = addr;
+       entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
+
+       ret = -EAGAIN;
+       entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
+                                       ksym_hbp_handler);
+
+       if (IS_ERR(entry->ksym_hbp)) {
+               ret = PTR_ERR(entry->ksym_hbp);
+               printk(KERN_INFO "ksym_tracer request failed. Try again"
+                                       " later!!\n");
+               goto err;
+       }
+
+       hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
+       ksym_filter_entry_count++;
+
+       return 0;
+
+err:
+       kfree(entry);
+
+       return ret;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+                                               size_t count, loff_t *ppos)
+{
+       struct trace_ksym *entry;
+       struct hlist_node *node;
+       struct trace_seq *s;
+       ssize_t cnt = 0;
+       int ret;
+
+       s = kmalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return -ENOMEM;
+       trace_seq_init(s);
+
+       mutex_lock(&ksym_tracer_mutex);
+
+       hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+               ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
+               if (entry->attr.bp_type == HW_BREAKPOINT_R)
+                       ret = trace_seq_puts(s, "r--\n");
+               else if (entry->attr.bp_type == HW_BREAKPOINT_W)
+                       ret = trace_seq_puts(s, "-w-\n");
+               else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
+                       ret = trace_seq_puts(s, "rw-\n");
+               WARN_ON_ONCE(!ret);
+       }
+
+       cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+       mutex_unlock(&ksym_tracer_mutex);
+
+       kfree(s);
+
+       return cnt;
+}
+
+static void __ksym_trace_reset(void)
+{
+       struct trace_ksym *entry;
+       struct hlist_node *node, *node1;
+
+       mutex_lock(&ksym_tracer_mutex);
+       hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+                                                               ksym_hlist) {
+               unregister_wide_hw_breakpoint(entry->ksym_hbp);
+               ksym_filter_entry_count--;
+               hlist_del_rcu(&(entry->ksym_hlist));
+               synchronize_rcu();
+               kfree(entry);
+       }
+       mutex_unlock(&ksym_tracer_mutex);
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+                                       const char __user *buffer,
+                                               size_t count, loff_t *ppos)
+{
+       struct trace_ksym *entry;
+       struct hlist_node *node;
+       char *input_string, *ksymname = NULL;
+       unsigned long ksym_addr = 0;
+       int ret, op, changed = 0;
+
+       input_string = kzalloc(count + 1, GFP_KERNEL);
+       if (!input_string)
+               return -ENOMEM;
+
+       if (copy_from_user(input_string, buffer, count)) {
+               kfree(input_string);
+               return -EFAULT;
+       }
+       input_string[count] = '\0';
+
+       strstrip(input_string);
+
+       /*
+        * Clear all breakpoints if:
+        * 1: echo > ksym_trace_filter
+        * 2: echo 0 > ksym_trace_filter
+        * 3: echo "*:---" > ksym_trace_filter
+        */
+       if (!input_string[0] || !strcmp(input_string, "0") ||
+           !strcmp(input_string, "*:---")) {
+               __ksym_trace_reset();
+               kfree(input_string);
+               return count;
+       }
+
+       ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+       if (ret < 0) {
+               kfree(input_string);
+               return ret;
+       }
+
+       mutex_lock(&ksym_tracer_mutex);
+
+       ret = -EINVAL;
+       hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+               if (entry->attr.bp_addr == ksym_addr) {
+                       /* Check for malformed request: (6) */
+                       if (entry->attr.bp_type != op)
+                               changed = 1;
+                       else
+                               goto out;
+                       break;
+               }
+       }
+       if (changed) {
+               unregister_wide_hw_breakpoint(entry->ksym_hbp);
+               entry->attr.bp_type = op;
+               ret = 0;
+               if (op > 0) {
+                       entry->ksym_hbp =
+                               register_wide_hw_breakpoint(&entry->attr,
+                                       ksym_hbp_handler);
+                       if (IS_ERR(entry->ksym_hbp))
+                               ret = PTR_ERR(entry->ksym_hbp);
+                       else
+                               goto out;
+               }
+               /* Error or "symbol:---" case: drop it */
+               ksym_filter_entry_count--;
+               hlist_del_rcu(&(entry->ksym_hlist));
+               synchronize_rcu();
+               kfree(entry);
+               goto out;
+       } else {
+               /* Check for malformed request: (4) */
+               if (op == 0)
+                       goto out;
+               ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+       }
+out:
+       mutex_unlock(&ksym_tracer_mutex);
+
+       kfree(input_string);
+
+       if (!ret)
+               ret = count;
+       return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+       .open           = tracing_open_generic,
+       .read           = ksym_trace_filter_read,
+       .write          = ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+       ksym_tracing_enabled = 0;
+       __ksym_trace_reset();
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+       int cpu, ret = 0;
+
+       for_each_online_cpu(cpu)
+               tracing_reset(tr, cpu);
+       ksym_tracing_enabled = 1;
+       ksym_trace_array = tr;
+
+       return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+       seq_puts(m,
+                "#       TASK-PID   CPU#      Symbol                    "
+                "Type    Function\n");
+       seq_puts(m,
+                "#          |        |          |                       "
+                " |         |\n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+       struct trace_entry *entry = iter->ent;
+       struct trace_seq *s = &iter->seq;
+       struct ksym_trace_entry *field;
+       char str[KSYM_SYMBOL_LEN];
+       int ret;
+
+       if (entry->type != TRACE_KSYM)
+               return TRACE_TYPE_UNHANDLED;
+
+       trace_assign_type(field, entry);
+
+       ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
+                               entry->pid, iter->cpu, (char *)field->addr);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       switch (field->type) {
+       case HW_BREAKPOINT_R:
+               ret = trace_seq_printf(s, " R  ");
+               break;
+       case HW_BREAKPOINT_W:
+               ret = trace_seq_printf(s, " W  ");
+               break;
+       case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+               ret = trace_seq_printf(s, " RW ");
+               break;
+       default:
+               return TRACE_TYPE_PARTIAL_LINE;
+       }
+
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       sprint_symbol(str, field->ip);
+       ret = trace_seq_printf(s, "%s\n", str);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+       .name           = "ksym_tracer",
+       .init           = ksym_trace_init,
+       .reset          = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+       .selftest       = trace_selftest_startup_ksym,
+#endif
+       .print_header   = ksym_trace_print_header,
+       .print_line     = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+       struct dentry *d_tracer;
+       struct dentry *entry;
+
+       d_tracer = tracing_init_dentry();
+       ksym_filter_entry_count = 0;
+
+       entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
+                                   NULL, &ksym_tracing_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs "
+                          "'ksym_trace_filter' file\n");
+
+       return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
+
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+static int ksym_tracer_stat_headers(struct seq_file *m)
+{
+       seq_puts(m, "  Access Type ");
+       seq_puts(m, "  Symbol                                       Counter\n");
+       seq_puts(m, "  ----------- ");
+       seq_puts(m, "  ------                                       -------\n");
+       return 0;
+}
+
+static int ksym_tracer_stat_show(struct seq_file *m, void *v)
+{
+       struct hlist_node *stat = v;
+       struct trace_ksym *entry;
+       int access_type = 0;
+       char fn_name[KSYM_NAME_LEN];
+
+       entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+
+       access_type = entry->attr.bp_type;
+
+       switch (access_type) {
+       case HW_BREAKPOINT_R:
+               seq_puts(m, "  R           ");
+               break;
+       case HW_BREAKPOINT_W:
+               seq_puts(m, "  W           ");
+               break;
+       case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+               seq_puts(m, "  RW          ");
+               break;
+       default:
+               seq_puts(m, "  NA          ");
+       }
+
+       if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
+               seq_printf(m, "  %-36s", fn_name);
+       else
+               seq_printf(m, "  %-36s", "<NA>");
+       seq_printf(m, " %15lu\n", entry->counter);
+
+       return 0;
+}
+
+static void *ksym_tracer_stat_start(struct tracer_stat *trace)
+{
+       return ksym_filter_head.first;
+}
+
+static void *
+ksym_tracer_stat_next(void *v, int idx)
+{
+       struct hlist_node *stat = v;
+
+       return stat->next;
+}
+
+static struct tracer_stat ksym_tracer_stats = {
+       .name = "ksym_tracer",
+       .stat_start = ksym_tracer_stat_start,
+       .stat_next = ksym_tracer_stat_next,
+       .stat_headers = ksym_tracer_stat_headers,
+       .stat_show = ksym_tracer_stat_show
+};
+
+__init static int ksym_tracer_stat_init(void)
+{
+       int ret;
+
+       ret = register_stat_tracer(&ksym_tracer_stats);
+       if (ret) {
+               printk(KERN_WARNING "Warning: could not register "
+                                   "ksym tracer stats\n");
+               return 1;
+       }
+
+       return 0;
+}
+fs_initcall(ksym_tracer_stat_init);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
index d2cdbabb4eadd4b0780950b46e9e4deda92198be..dc98309e839a7ca63ff20b05786e87d51e350c92 100644 (file)
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
        case TRACE_GRAPH_ENT:
        case TRACE_GRAPH_RET:
        case TRACE_HW_BRANCHES:
+       case TRACE_KSYM:
                return 1;
        }
        return 0;
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
        return ret;
 }
 #endif /* CONFIG_HW_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+static int ksym_selftest_dummy;
+
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+       unsigned long count;
+       int ret;
+
+       /* start the tracing */
+       ret = tracer_init(trace, tr);
+       if (ret) {
+               warn_failed_init_tracer(trace, ret);
+               return ret;
+       }
+
+       ksym_selftest_dummy = 0;
+       /* Register the read-write tracing request */
+
+       ret = process_new_ksym_entry("ksym_selftest_dummy",
+                                    HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+                                       (unsigned long)(&ksym_selftest_dummy));
+
+       if (ret < 0) {
+               printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+               goto ret_path;
+       }
+       /* Perform a read and a write operation over the dummy variable to
+        * trigger the tracer
+        */
+       if (ksym_selftest_dummy == 0)
+               ksym_selftest_dummy++;
+
+       /* stop the tracing. */
+       tracing_stop();
+       /* check the trace buffer */
+       ret = trace_test_buffer(tr, &count);
+       trace->reset(tr);
+       tracing_start();
+
+       /* read & write operations - one each is performed on the dummy variable
+        * triggering two entries in the trace buffer
+        */
+       if (!ret && count != 2) {
+               printk(KERN_CONT "Ksym tracer startup test failed");
+               ret = -1;
+       }
+
+ret_path:
+       return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
+
index 51213b0aa81b2dc48d0a5a6f84effb5a68da1a7b..57501d90096abbba5ee3a87de54476a794ba176e 100644 (file)
@@ -51,32 +51,6 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
        return syscalls_metadata[nr];
 }
 
-int syscall_name_to_nr(char *name)
-{
-       int i;
-
-       if (!syscalls_metadata)
-               return -1;
-
-       for (i = 0; i < NR_syscalls; i++) {
-               if (syscalls_metadata[i]) {
-                       if (!strcmp(syscalls_metadata[i]->name, name))
-                               return i;
-               }
-       }
-       return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
-       syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
-{
-       syscalls_metadata[num]->exit_id = id;
-}
-
 enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags)
 {
@@ -93,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
        if (!entry)
                goto end;
 
-       if (entry->enter_id != ent->type) {
+       if (entry->enter_event->id != ent->type) {
                WARN_ON_ONCE(1);
                goto end;
        }
@@ -148,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
                return TRACE_TYPE_HANDLED;
        }
 
-       if (entry->exit_id != ent->type) {
+       if (entry->exit_event->id != ent->type) {
                WARN_ON_ONCE(1);
                return TRACE_TYPE_UNHANDLED;
        }
@@ -172,18 +146,11 @@ extern char *__bad_type_size(void);
 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 {
        int i;
-       int nr;
        int ret;
-       struct syscall_metadata *entry;
+       struct syscall_metadata *entry = call->data;
        struct syscall_trace_enter trace;
        int offset = offsetof(struct syscall_trace_enter, args);
 
-       nr = syscall_name_to_nr(call->data);
-       entry = syscall_nr_to_meta(nr);
-
-       if (!entry)
-               return 0;
-
        ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
                               "\tsigned:%u;\n",
                               SYSCALL_FIELD(int, nr));
@@ -245,22 +212,19 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
 int syscall_enter_define_fields(struct ftrace_event_call *call)
 {
        struct syscall_trace_enter trace;
-       struct syscall_metadata *meta;
+       struct syscall_metadata *meta = call->data;
        int ret;
-       int nr;
        int i;
        int offset = offsetof(typeof(trace), args);
 
-       nr = syscall_name_to_nr(call->data);
-       meta = syscall_nr_to_meta(nr);
-
-       if (!meta)
-               return 0;
-
        ret = trace_define_common_fields(call);
        if (ret)
                return ret;
 
+       ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+       if (ret)
+               return ret;
+
        for (i = 0; i < meta->nb_args; i++) {
                ret = trace_define_field(call, meta->types[i],
                                         meta->args[i], offset,
@@ -281,6 +245,10 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
        if (ret)
                return ret;
 
+       ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+       if (ret)
+               return ret;
+
        ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
                                 FILTER_OTHER);
 
@@ -308,8 +276,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 
        size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
 
-       event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
-                                                 size, 0, 0);
+       event = trace_current_buffer_lock_reserve(&buffer,
+                       sys_data->enter_event->id, size, 0, 0);
        if (!event)
                return;
 
@@ -340,8 +308,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
        if (!sys_data)
                return;
 
-       event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
-                               sizeof(*entry), 0, 0);
+       event = trace_current_buffer_lock_reserve(&buffer,
+                       sys_data->exit_event->id, sizeof(*entry), 0, 0);
        if (!event)
                return;
 
@@ -358,10 +326,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
        int ret = 0;
        int num;
-       char *name;
 
-       name = (char *)call->data;
-       num = syscall_name_to_nr(name);
+       num = ((struct syscall_metadata *)call->data)->syscall_nr;
        if (num < 0 || num >= NR_syscalls)
                return -ENOSYS;
        mutex_lock(&syscall_trace_lock);
@@ -381,10 +347,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
        int num;
-       char *name;
 
-       name = (char *)call->data;
-       num = syscall_name_to_nr(name);
+       num = ((struct syscall_metadata *)call->data)->syscall_nr;
        if (num < 0 || num >= NR_syscalls)
                return;
        mutex_lock(&syscall_trace_lock);
@@ -399,10 +363,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
        int ret = 0;
        int num;
-       char *name;
 
-       name = call->data;
-       num = syscall_name_to_nr(name);
+       num = ((struct syscall_metadata *)call->data)->syscall_nr;
        if (num < 0 || num >= NR_syscalls)
                return -ENOSYS;
        mutex_lock(&syscall_trace_lock);
@@ -422,10 +384,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
        int num;
-       char *name;
 
-       name = call->data;
-       num = syscall_name_to_nr(name);
+       num = ((struct syscall_metadata *)call->data)->syscall_nr;
        if (num < 0 || num >= NR_syscalls)
                return;
        mutex_lock(&syscall_trace_lock);
@@ -436,13 +396,17 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
        mutex_unlock(&syscall_trace_lock);
 }
 
-struct trace_event event_syscall_enter = {
-       .trace                  = print_syscall_enter,
-};
+int init_syscall_trace(struct ftrace_event_call *call)
+{
+       int id;
 
-struct trace_event event_syscall_exit = {
-       .trace                  = print_syscall_exit,
-};
+       id = register_ftrace_event(call->event);
+       if (!id)
+               return -ENODEV;
+       call->id = id;
+       INIT_LIST_HEAD(&call->fields);
+       return 0;
+}
 
 int __init init_ftrace_syscalls(void)
 {
@@ -460,6 +424,10 @@ int __init init_ftrace_syscalls(void)
        for (i = 0; i < NR_syscalls; i++) {
                addr = arch_syscall_addr(i);
                meta = find_syscall_meta(addr);
+               if (!meta)
+                       continue;
+
+               meta->syscall_nr = i;
                syscalls_metadata[i] = meta;
        }
 
@@ -477,11 +445,12 @@ static int sys_prof_refcount_exit;
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
        struct syscall_metadata *sys_data;
-       struct perf_trace_buf *trace_buf;
        struct syscall_trace_enter *rec;
        unsigned long flags;
+       char *trace_buf;
        char *raw_data;
        int syscall_nr;
+       int rctx;
        int size;
        int cpu;
 
@@ -505,54 +474,42 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
        /* Protect the per cpu buffer, begin the rcu read side */
        local_irq_save(flags);
 
+       rctx = perf_swevent_get_recursion_context();
+       if (rctx < 0)
+               goto end_recursion;
+
        cpu = smp_processor_id();
 
-       if (in_nmi())
-               trace_buf = rcu_dereference(perf_trace_buf_nmi);
-       else
-               trace_buf = rcu_dereference(perf_trace_buf);
+       trace_buf = rcu_dereference(perf_trace_buf);
 
        if (!trace_buf)
                goto end;
 
-       trace_buf = per_cpu_ptr(trace_buf, cpu);
-
-       if (trace_buf->recursion++)
-               goto end_recursion;
-
-       /*
-        * Make recursion update visible before entering perf_tp_event
-        * so that we protect from perf recursions.
-        */
-       barrier();
-
-       raw_data = trace_buf->buf;
+       raw_data = per_cpu_ptr(trace_buf, cpu);
 
        /* zero the dead bytes from align to not leak stack to user */
        *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
 
        rec = (struct syscall_trace_enter *) raw_data;
        tracing_generic_entry_update(&rec->ent, 0, 0);
-       rec->ent.type = sys_data->enter_id;
+       rec->ent.type = sys_data->enter_event->id;
        rec->nr = syscall_nr;
        syscall_get_arguments(current, regs, 0, sys_data->nb_args,
                               (unsigned long *)&rec->args);
-       perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
+       perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
 
-end_recursion:
-       trace_buf->recursion--;
 end:
+       perf_swevent_put_recursion_context(rctx);
+end_recursion:
        local_irq_restore(flags);
 }
 
-int reg_prof_syscall_enter(char *name)
+int prof_sysenter_enable(struct ftrace_event_call *call)
 {
        int ret = 0;
        int num;
 
-       num = syscall_name_to_nr(name);
-       if (num < 0 || num >= NR_syscalls)
-               return -ENOSYS;
+       num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
        mutex_lock(&syscall_trace_lock);
        if (!sys_prof_refcount_enter)
@@ -568,13 +525,11 @@ int reg_prof_syscall_enter(char *name)
        return ret;
 }
 
-void unreg_prof_syscall_enter(char *name)
+void prof_sysenter_disable(struct ftrace_event_call *call)
 {
        int num;
 
-       num = syscall_name_to_nr(name);
-       if (num < 0 || num >= NR_syscalls)
-               return;
+       num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
        mutex_lock(&syscall_trace_lock);
        sys_prof_refcount_enter--;
@@ -588,10 +543,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
        struct syscall_metadata *sys_data;
        struct syscall_trace_exit *rec;
-       struct perf_trace_buf *trace_buf;
        unsigned long flags;
        int syscall_nr;
+       char *trace_buf;
        char *raw_data;
+       int rctx;
        int size;
        int cpu;
 
@@ -617,28 +573,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 
        /* Protect the per cpu buffer, begin the rcu read side */
        local_irq_save(flags);
+
+       rctx = perf_swevent_get_recursion_context();
+       if (rctx < 0)
+               goto end_recursion;
+
        cpu = smp_processor_id();
 
-       if (in_nmi())
-               trace_buf = rcu_dereference(perf_trace_buf_nmi);
-       else
-               trace_buf = rcu_dereference(perf_trace_buf);
+       trace_buf = rcu_dereference(perf_trace_buf);
 
        if (!trace_buf)
                goto end;
 
-       trace_buf = per_cpu_ptr(trace_buf, cpu);
-
-       if (trace_buf->recursion++)
-               goto end_recursion;
-
-       /*
-        * Make recursion update visible before entering perf_tp_event
-        * so that we protect from perf recursions.
-        */
-       barrier();
-
-       raw_data = trace_buf->buf;
+       raw_data = per_cpu_ptr(trace_buf, cpu);
 
        /* zero the dead bytes from align to not leak stack to user */
        *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -646,26 +593,24 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
        rec = (struct syscall_trace_exit *)raw_data;
 
        tracing_generic_entry_update(&rec->ent, 0, 0);
-       rec->ent.type = sys_data->exit_id;
+       rec->ent.type = sys_data->exit_event->id;
        rec->nr = syscall_nr;
        rec->ret = syscall_get_return_value(current, regs);
 
-       perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
+       perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
 
-end_recursion:
-       trace_buf->recursion--;
 end:
+       perf_swevent_put_recursion_context(rctx);
+end_recursion:
        local_irq_restore(flags);
 }
 
-int reg_prof_syscall_exit(char *name)
+int prof_sysexit_enable(struct ftrace_event_call *call)
 {
        int ret = 0;
        int num;
 
-       num = syscall_name_to_nr(name);
-       if (num < 0 || num >= NR_syscalls)
-               return -ENOSYS;
+       num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
        mutex_lock(&syscall_trace_lock);
        if (!sys_prof_refcount_exit)
@@ -681,13 +626,11 @@ int reg_prof_syscall_exit(char *name)
        return ret;
 }
 
-void unreg_prof_syscall_exit(char *name)
+void prof_sysexit_disable(struct ftrace_event_call *call)
 {
        int num;
 
-       num = syscall_name_to_nr(name);
-       if (num < 0 || num >= NR_syscalls)
-               return;
+       num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
        mutex_lock(&syscall_trace_lock);
        sys_prof_refcount_exit--;
index b92bde3c6a89e7fdb09721f1e29a5aa3105c604a..e4be84ac3d381c217982cdffc71ec92fa7cd6c9b 100644 (file)
@@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES
        default m
        depends on SAMPLE_KPROBES && KRETPROBES
 
+config SAMPLE_HW_BREAKPOINT
+       tristate "Build kernel hardware breakpoint examples -- loadable module only"
+       depends on HAVE_HW_BREAKPOINT && m
+       help
+         This builds kernel hardware breakpoint example modules.
+
 endif # SAMPLES
 
index 43343a03b1f4a8cbeca3b1371802135366bbaea5..0f15e6d77fd641a516f3ed53ba2d16a3a273108d 100644 (file)
@@ -1,3 +1,4 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)  += kobject/ kprobes/ tracepoints/ trace_events/
+obj-$(CONFIG_SAMPLES)  += kobject/ kprobes/ tracepoints/ trace_events/ \
+                          hw_breakpoint/
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644 (file)
index 0000000..0f5c31c
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644 (file)
index 0000000..2952550
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * usage: insmod data_breakpoint.ko ksym=<ksym_name>
+ *
+ * This file is a kernel module that places a breakpoint over ksym_name kernel
+ * variable using Hardware Breakpoint register. The corresponding handler which
+ * prints a backtrace is invoked everytime a write operation is performed on
+ * that variable.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ *
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>      /* Needed by all modules */
+#include <linux/kernel.h>      /* Needed for KERN_INFO */
+#include <linux/init.h>                /* Needed for the macros */
+#include <linux/kallsyms.h>
+
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+
+struct perf_event **sample_hbp;
+
+static char ksym_name[KSYM_NAME_LEN] = "pid_max";
+module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
+MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
+                       " write operations on the kernel symbol");
+
+static void sample_hbp_handler(struct perf_event *temp, void *data)
+{
+       printk(KERN_INFO "%s value is changed\n", ksym_name);
+       dump_stack();
+       printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
+}
+
+static int __init hw_break_module_init(void)
+{
+       int ret;
+       DEFINE_BREAKPOINT_ATTR(attr);
+
+       attr.bp_addr = kallsyms_lookup_name(ksym_name);
+       attr.bp_len = HW_BREAKPOINT_LEN_4;
+       attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+
+       sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
+       if (IS_ERR(sample_hbp)) {
+               ret = PTR_ERR(sample_hbp);
+               goto fail;
+       }
+
+       printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name);
+
+       return 0;
+
+fail:
+       printk(KERN_INFO "Breakpoint registration failed\n");
+
+       return ret;
+}
+
+static void __exit hw_break_module_exit(void)
+{
+       unregister_wide_hw_breakpoint(sample_hbp);
+       printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
+}
+
+module_init(hw_break_module_init);
+module_exit(hw_break_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("K.Prasad");
+MODULE_DESCRIPTION("ksym breakpoint");
index ea9f8a58678f3d9baaa0e073c9233584874700fa..241310e59cd6e6db33a24761a74cfda3254969f1 100755 (executable)
@@ -1852,10 +1852,17 @@ sub tracepoint_munge($) {
        my $tracepointname = 0;
        my $tracepointargs = 0;
 
-       if($prototype =~ m/TRACE_EVENT\((.*?),/) {
+       if ($prototype =~ m/TRACE_EVENT\((.*?),/) {
                $tracepointname = $1;
        }
-       if($prototype =~ m/TP_PROTO\((.*?)\)/) {
+       if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) {
+               $tracepointname = $1;
+       }
+       if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) {
+               $tracepointname = $2;
+       }
+       $tracepointname =~ s/^\s+//; #strip leading whitespace
+       if ($prototype =~ m/TP_PROTO\((.*?)\)/) {
                $tracepointargs = $1;
        }
        if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
@@ -1920,7 +1927,9 @@ sub process_state3_function($$) {
        if ($prototype =~ /SYSCALL_DEFINE/) {
                syscall_munge();
        }
-       if ($prototype =~ /TRACE_EVENT/) {
+       if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ ||
+           $prototype =~ /DEFINE_SINGLE_EVENT/)
+       {
                tracepoint_munge($file);
        }
        dump_function($prototype, $file);
index 0854f110bf7f79a7de7617bd37a3c75e06d11430..fe08660ce0bd05841869ed59f762eceed7c089d7 100644 (file)
@@ -12,6 +12,7 @@ perf*.1
 perf*.xml
 perf*.html
 common-cmds.h
+perf.data
 tags
 TAGS
 cscope*
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644 (file)
index 0000000..44b0ce3
--- /dev/null
@@ -0,0 +1,44 @@
+perf-kmem(1)
+==============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory(slab) properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record} [<options>]
+
+DESCRIPTION
+-----------
+There's two variants of perf kmem:
+
+  'perf kmem record <command>' to record the kmem events
+  of an arbitrary workload.
+
+  'perf kmem' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+       Select the input file (default: perf.data)
+
+--stat=<caller|alloc>::
+       Select per callsite or per allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+       Sort the output (default: frag,hit,bytes)
+
+-l <num>::
+--line=<num>::
+       Print n lines only
+
+--raw-ip::
+       Print raw ip instead of symbol
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
index 0ff23de9e4539599acdba5a8ade8325117e75caa..fc46c0b40f6e431bd5124885544b5827a96234c8 100644 (file)
@@ -26,11 +26,19 @@ OPTIONS
 
 -e::
 --event=::
-       Select the PMU event. Selection can be a symbolic event name
-       (use 'perf list' to list all events) or a raw PMU
-       event (eventsel+umask) in the form of rNNN where NNN is a
-       hexadecimal event descriptor.
+       Select the PMU event. Selection can be:
 
+        - a symbolic event name        (use 'perf list' to list all events)
+
+        - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+         hexadecimal event descriptor.
+
+        - a hardware breakpoint event in the form of '\mem:addr[:access]'
+          where addr is the address in memory you want to break in.
+          Access is the memory access type (read, write, execute) it can
+          be passed as follows: '\mem:addr[:[r][w][x]]'.
+          If you want to profile read-write accesses in 0x1000, just set
+          'mem:0x1000:rw'.
 -a::
         System-wide collection.
 
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt
new file mode 100644 (file)
index 0000000..c5f55f4
--- /dev/null
@@ -0,0 +1,219 @@
+perf-trace-perl(1)
+==================
+
+NAME
+----
+perf-trace-perl - Process trace data with a Perl script
+
+SYNOPSIS
+--------
+[verse]
+'perf trace' [-s [lang]:script[.ext] ]
+
+DESCRIPTION
+-----------
+
+This perf trace option is used to process perf trace data using perf's
+built-in Perl interpreter.  It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Perl script, if any.
+
+STARTER SCRIPTS
+---------------
+
+You can avoid reading the rest of this document by running 'perf trace
+-g perl' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/perl for typical examples showing how to
+do basic things like aggregate event data, print results, etc.  Also,
+the check-perf-trace.pl script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf trace is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace.  If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_handled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above options: -c 1 says to sample every event, -a to enable
+system-wide collection, -M to multiplex the output, and -R to collect
+raw samples.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+        field:unsigned short common_type;
+        field:unsigned char common_flags;
+        field:unsigned char common_preempt_count;
+        field:int common_pid;
+        field:int common_lock_depth;
+
+        field:char comm[TASK_COMM_LEN];
+        field:pid_t pid;
+        field:int prio;
+        field:int success;
+        field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+sub sched::sched_wakeup
+{
+   my ($event_name, $context, $common_cpu, $common_secs,
+       $common_nsecs, $common_pid, $common_comm,
+       $comm, $pid, $prio, $success, $target_cpu) = @_;
+}
+----
+
+The handler function takes the form subsystem::event_name.
+
+The $common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ $event_name               the name of the event as text
+ $context                  an opaque 'cookie' used in calls back into perf
+ $common_cpu               the cpu the event occurred on
+ $common_secs              the secs portion of the event timestamp
+ $common_nsecs             the nsecs portion of the event timestamp
+ $common_pid               the pid of the current task
+ $common_comm              the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script.  The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf trace Perl script should start by setting up a Perl module
+search path and 'use'ing a few support modules (see module
+descriptions below):
+
+----
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+ use lib "./Perf-Trace-Util/lib";
+ use Perf::Trace::Core;
+ use Perf::Trace::Context;
+ use Perf::Trace::Util;
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+ sub trace_begin
+ {
+ }
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+sub trace_end
+{
+}
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it.  The standard set
+ of common arguments are passed into it:
+
+----
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs,
+        $common_nsecs, $common_pid, $common_comm) = @_;
+}
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf trace Perl modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various Perf::Trace::* Perl modules.  To use the functions and
+variables from the given module, add the corresponding 'use
+Perf::Trace::XXX' line to your perf trace script.
+
+Perf::Trace::Core Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields.  These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+  flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name
+  symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name
+
+Perf::Trace::Context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+Perf::Trace::Context defines a set of functions that can be used to
+access this data in the context of the current event.  Each of these
+functions expects a $context variable, which is the same as the
+$context variable passed into every event handler as the second
+argument.
+
+ common_pc($context) - returns common_preempt count for the current event
+ common_flags($context) - returns common_flags for the current event
+ common_lock_depth($context) - returns common_lock_depth for the current event
+
+Perf::Trace::Util Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Various utility functions for use with perf trace:
+
+  nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
+  nsecs_secs($nsecs) - returns whole secs portion given nsecs
+  nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
+  nsecs_str($nsecs) - returns printable string in the form secs.nsecs
+  avg($total, $n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-trace[1]
index 41ed75398ca98efd9211d367e4e64f5de6da06e0..07065efa60e09b9a8ca3559583b7cdfd63ac2c0a 100644 (file)
@@ -20,6 +20,15 @@ OPTIONS
 --dump-raw-trace=::
         Display verbose dump of the trace data.
 
+-s::
+--script=::
+        Process trace data with the given script ([lang]:script[.ext]).
+
+-g::
+--gen-script=::
+        Generate perf-trace.[ext] starter script for given language,
+        using current perf.data.
+
 SEE ALSO
 --------
-linkperf:perf-record[1]
+linkperf:perf-record[1], linkperf:perf-trace-perl[1]
index 5d1a8b0dff8fef2bd3166046b50420c85187186a..23ec66098bdc45c6c81ca9559fd16dc46f3b5ffb 100644 (file)
@@ -2,6 +2,7 @@
 all::
 
 # Define V=1 to have a more verbose compile.
+# Define V=2 to have an even more verbose compile.
 #
 # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
 # or vsnprintf() return -1 instead of number of characters which would
@@ -147,6 +148,8 @@ all::
 # broken, or spawning external process is slower than built-in grep perf has).
 #
 # Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
 
 PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
        @$(SHELL_PATH) util/PERF-VERSION-GEN
@@ -159,20 +162,6 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
 uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
 uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 
-#
-# Add -m32 for cross-builds:
-#
-ifdef NO_64BIT
-  MBITS := -m32
-else
-  #
-  # If we're on a 64-bit kernel, use -m64:
-  #
-  ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
-    MBITS := -m64
-  endif
-endif
-
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
 #
@@ -209,7 +198,7 @@ ifndef PERF_DEBUG
   CFLAGS_OPTIMIZE = -O6
 endif
 
-CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS)
+CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 EXTLIBS = -lpthread -lrt -lelf -lm
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
@@ -261,7 +250,7 @@ PTHREAD_LIBS = -lpthread
 # explicitly what architecture to check for. Fix this up for yours..
 SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
 
-ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null >/dev/null 2>&1 && echo y"), y)
+ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null "$(QUIET_STDERR)" && echo y"), y)
   CFLAGS := $(CFLAGS) -fstack-protector-all
 endif
 
@@ -380,6 +369,8 @@ LIB_H += util/sort.h
 LIB_H += util/hist.h
 LIB_H += util/thread.h
 LIB_H += util/data_map.h
+LIB_H += util/probe-finder.h
+LIB_H += util/probe-event.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
@@ -418,10 +409,12 @@ LIB_OBJS += util/thread.o
 LIB_OBJS += util/trace-event-parse.o
 LIB_OBJS += util/trace-event-read.o
 LIB_OBJS += util/trace-event-info.o
+LIB_OBJS += util/trace-event-perl.o
 LIB_OBJS += util/svghelper.o
 LIB_OBJS += util/sort.o
 LIB_OBJS += util/hist.o
 LIB_OBJS += util/data_map.o
+LIB_OBJS += util/probe-event.o
 
 BUILTIN_OBJS += builtin-annotate.o
 
@@ -430,6 +423,7 @@ BUILTIN_OBJS += builtin-bench.o
 # Benchmark modules
 BUILTIN_OBJS += bench/sched-messaging.o
 BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o
 
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-sched.o
@@ -442,9 +436,15 @@ BUILTIN_OBJS += builtin-timechart.o
 BUILTIN_OBJS += builtin-top.o
 BUILTIN_OBJS += builtin-trace.o
 BUILTIN_OBJS += builtin-probe.o
+BUILTIN_OBJS += builtin-kmem.o
 
 PERFLIBS = $(LIB_FILE)
 
+ifeq ($(V), 2)
+       QUIET_STDERR = ">/dev/null"
+else
+       QUIET_STDERR = ">/dev/null 2>&1"
+endif
 #
 # Platform specific tweaks
 #
@@ -472,49 +472,58 @@ ifeq ($(uname_S),Darwin)
        PTHREAD_LIBS =
 endif
 
-ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y)
-ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y)
-       msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]);
+ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
+ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
+       msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
 endif
 
-       ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y)
+       ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
                BASIC_CFLAGS += -DLIBELF_NO_MMAP
        endif
 else
        msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
 endif
 
-ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y)
+ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
        msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231);
        BASIC_CFLAGS += -DNO_LIBDWARF
 else
        EXTLIBS += -lelf -ldwarf
-       LIB_H += util/probe-finder.h
        LIB_OBJS += util/probe-finder.o
 endif
 
+PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
+PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+
+ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
+       BASIC_CFLAGS += -DNO_LIBPERL
+else
+       ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
+       LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o
+endif
+
 ifdef NO_DEMANGLE
        BASIC_CFLAGS += -DNO_DEMANGLE
 else
-       has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd > /dev/null 2>&1 && echo y")
+       has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y")
 
        ifeq ($(has_bfd),y)
                EXTLIBS += -lbfd
        else
-               has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty > /dev/null 2>&1 && echo y")
+               has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y")
                ifeq ($(has_bfd_iberty),y)
                        EXTLIBS += -lbfd -liberty
                else
-                       has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
+                       has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y")
                        ifeq ($(has_bfd_iberty_z),y)
                                EXTLIBS += -lbfd -liberty -lz
                        else
-                               has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty > /dev/null 2>&1 && echo y")
+                               has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y")
                                ifeq ($(has_cplus_demangle),y)
                                        EXTLIBS += -liberty
                                        BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
                                else
-                                       msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
+                                       msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
                                        BASIC_CFLAGS += -DNO_DEMANGLE
                                endif
                        endif
@@ -864,6 +873,12 @@ util/hweight.o: ../../lib/hweight.c PERF-CFLAGS
 util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS
        $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
+util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS
+       $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+
+scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS
+       $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+
 perf-%$X: %.o $(PERFLIBS)
        $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
@@ -971,6 +986,13 @@ export perfexec_instdir
 install: all
        $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
        $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
+       $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
+       $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+       $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
+       $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
+       $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+       $(INSTALL) scripts/perl/Perf-Trace-Util/Makefile.PL -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util'
+       $(INSTALL) scripts/perl/Perf-Trace-Util/README -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util'
 ifdef BUILT_INS
        $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
        $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -1056,7 +1078,7 @@ distclean: clean
 #      $(RM) configure
 
 clean:
-       $(RM) *.o */*.o $(LIB_FILE)
+       $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE)
        $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
        $(RM) $(TEST_PROGRAMS)
        $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
index 9fbd8d745fa1ee933ecbf14028bd199f4874d92d..f7781c6267c06098fa2ad1376e60c69fe2497ffb 100644 (file)
@@ -3,6 +3,7 @@
 
 extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
 extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
 #define BENCH_FORMAT_DEFAULT           0
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644 (file)
index 0000000..8977317
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char      *length_str     = "1MB";
+static const char      *routine        = "default";
+static int             use_clock       = 0;
+static int             clock_fd;
+
+static const struct option options[] = {
+       OPT_STRING('l', "length", &length_str, "1MB",
+                   "Specify length of memory to copy. "
+                   "available unit: B, MB, GB (upper and lower)"),
+       OPT_STRING('r', "routine", &routine, "default",
+                   "Specify routine to copy"),
+       OPT_BOOLEAN('c', "clock", &use_clock,
+                   "Use CPU clock for measuring"),
+       OPT_END()
+};
+
+struct routine {
+       const char *name;
+       const char *desc;
+       void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+       { "default",
+         "Default memcpy() provided by glibc",
+         memcpy },
+       { NULL,
+         NULL,
+         NULL   }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+       "perf bench mem memcpy <options>",
+       NULL
+};
+
+static struct perf_event_attr clock_attr = {
+       .type           = PERF_TYPE_HARDWARE,
+       .config         = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+       clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+       if (clock_fd < 0 && errno == ENOSYS)
+               die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+       else
+               BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+       int ret;
+       u64 clk;
+
+       ret = read(clock_fd, &clk, sizeof(u64));
+       BUG_ON(ret != sizeof(u64));
+
+       return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+       return (double)ts->tv_sec +
+               (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+                    const char *prefix __used)
+{
+       int i;
+       void *dst, *src;
+       size_t length;
+       double bps = 0.0;
+       struct timeval tv_start, tv_end, tv_diff;
+       u64 clock_start, clock_end, clock_diff;
+
+       clock_start = clock_end = clock_diff = 0ULL;
+       argc = parse_options(argc, argv, options,
+                            bench_mem_memcpy_usage, 0);
+
+       tv_diff.tv_sec = 0;
+       tv_diff.tv_usec = 0;
+       length = (size_t)perf_atoll((char *)length_str);
+
+       if ((s64)length <= 0) {
+               fprintf(stderr, "Invalid length:%s\n", length_str);
+               return 1;
+       }
+
+       for (i = 0; routines[i].name; i++) {
+               if (!strcmp(routines[i].name, routine))
+                       break;
+       }
+       if (!routines[i].name) {
+               printf("Unknown routine:%s\n", routine);
+               printf("Available routines...\n");
+               for (i = 0; routines[i].name; i++) {
+                       printf("\t%s ... %s\n",
+                              routines[i].name, routines[i].desc);
+               }
+               return 1;
+       }
+
+       dst = zalloc(length);
+       if (!dst)
+               die("memory allocation failed - maybe length is too large?\n");
+
+       src = zalloc(length);
+       if (!src)
+               die("memory allocation failed - maybe length is too large?\n");
+
+       if (bench_format == BENCH_FORMAT_DEFAULT) {
+               printf("# Copying %s Bytes from %p to %p ...\n\n",
+                      length_str, src, dst);
+       }
+
+       if (use_clock) {
+               init_clock();
+               clock_start = get_clock();
+       } else {
+               BUG_ON(gettimeofday(&tv_start, NULL));
+       }
+
+       routines[i].fn(dst, src, length);
+
+       if (use_clock) {
+               clock_end = get_clock();
+               clock_diff = clock_end - clock_start;
+       } else {
+               BUG_ON(gettimeofday(&tv_end, NULL));
+               timersub(&tv_end, &tv_start, &tv_diff);
+               bps = (double)((double)length / timeval2double(&tv_diff));
+       }
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               if (use_clock) {
+                       printf(" %14lf Clock/Byte\n",
+                              (double)clock_diff / (double)length);
+               } else {
+                       if (bps < K)
+                               printf(" %14lf B/Sec\n", bps);
+                       else if (bps < K * K)
+                               printf(" %14lfd KB/Sec\n", bps / 1024);
+                       else if (bps < K * K * K)
+                               printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+                       else {
+                               printf(" %14lf GB/Sec\n",
+                                      bps / 1024 / 1024 / 1024);
+                       }
+               }
+               break;
+       case BENCH_FORMAT_SIMPLE:
+               if (use_clock) {
+                       printf("%14lf\n",
+                              (double)clock_diff / (double)length);
+               } else
+                       printf("%lf\n", bps);
+               break;
+       default:
+               /* reaching this means there's some disaster: */
+               die("unknown format: %d\n", bench_format);
+               break;
+       }
+
+       return 0;
+}
index 77d50a6d68027cf9a176882ff4a28dc720a95711..0bf2e8f9af5776538237fa7e842c33e070b4dbe0 100644 (file)
 #include "perf.h"
 #include "util/debug.h"
 
+#include "util/event.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 #include "util/thread.h"
 #include "util/sort.h"
 #include "util/hist.h"
+#include "util/data_map.h"
 
 static char            const *input_name = "perf.data";
 
 static int             force;
-static int             input;
 
 static int             full_paths;
 
 static int             print_line;
 
-static unsigned long   page_size;
-static unsigned long   mmap_window = 32;
-
 struct sym_hist {
        u64             sum;
        u64             ip[0];
@@ -53,6 +51,11 @@ struct sym_priv {
        struct sym_ext  *ext;
 };
 
+static struct symbol_conf symbol_conf = {
+       .priv_size        = sizeof(struct sym_priv),
+       .try_vmlinux_path = true,
+};
+
 static const char *sym_hist_filter;
 
 static int symbol_filter(struct map *map __used, struct symbol *sym)
@@ -118,186 +121,34 @@ static void hist_hit(struct hist_entry *he, u64 ip)
                        h->ip[offset]);
 }
 
-static int hist_entry__add(struct thread *thread, struct map *map,
-                          struct symbol *sym, u64 ip, u64 count, char level)
+static int hist_entry__add(struct addr_location *al, u64 count)
 {
        bool hit;
-       struct hist_entry *he = __hist_entry__add(thread, map, sym, NULL, ip,
-                                                 count, level, &hit);
+       struct hist_entry *he = __hist_entry__add(al, NULL, count, &hit);
        if (he == NULL)
                return -ENOMEM;
-       hist_hit(he, ip);
+       hist_hit(he, al->addr);
        return 0;
 }
 
-static int
-process_sample_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_sample_event(event_t *event)
 {
-       char level;
-       u64 ip = event->ip.ip;
-       struct map *map = NULL;
-       struct symbol *sym = NULL;
-       struct thread *thread = threads__findnew(event->ip.pid);
-
-       dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->header.misc,
-               event->ip.pid,
-               (void *)(long)ip);
-
-       if (thread == NULL) {
+       struct addr_location al;
+
+       dump_printf("(IP, %d): %d: %p\n", event->header.misc,
+                   event->ip.pid, (void *)(long)event->ip.ip);
+
+       if (event__preprocess_sample(event, &al, symbol_filter) < 0) {
                fprintf(stderr, "problem processing %d event, skipping it.\n",
                        event->header.type);
                return -1;
        }
 
-       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
-       if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
-               level = 'k';
-               sym = kernel_maps__find_symbol(ip, &map);
-               dump_printf(" ...... dso: %s\n",
-                           map ? map->dso->long_name : "<not found>");
-       } else if (event->header.misc & PERF_RECORD_MISC_USER) {
-               level = '.';
-               map = thread__find_map(thread, ip);
-               if (map != NULL) {
-got_map:
-                       ip = map->map_ip(map, ip);
-                       sym = map__find_symbol(map, ip, symbol_filter);
-               } else {
-                       /*
-                        * If this is outside of all known maps,
-                        * and is a negative address, try to look it
-                        * up in the kernel dso, as it might be a
-                        * vsyscall or vdso (which executes in user-mode).
-                        *
-                        * XXX This is nasty, we should have a symbol list in
-                        * the "[vdso]" dso, but for now lets use the old
-                        * trick of looking in the whole kernel symbol list.
-                        */
-                       if ((long long)ip < 0) {
-                               map = kernel_map;
-                               goto got_map;
-                       }
-               }
-               dump_printf(" ...... dso: %s\n",
-                           map ? map->dso->long_name : "<not found>");
-       } else {
-               level = 'H';
-               dump_printf(" ...... dso: [hypervisor]\n");
-       }
-
-       if (hist_entry__add(thread, map, sym, ip, 1, level)) {
+       if (hist_entry__add(&al, 1)) {
                fprintf(stderr, "problem incrementing symbol count, "
                                "skipping event\n");
                return -1;
        }
-       total++;
-
-       return 0;
-}
-
-static int
-process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       struct map *map = map__new(&event->mmap, NULL, 0);
-       struct thread *thread = threads__findnew(event->mmap.pid);
-
-       dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->mmap.pid,
-               (void *)(long)event->mmap.start,
-               (void *)(long)event->mmap.len,
-               (void *)(long)event->mmap.pgoff,
-               event->mmap.filename);
-
-       if (thread == NULL || map == NULL) {
-               dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
-               return 0;
-       }
-
-       thread__insert_map(thread, map);
-       total_mmap++;
-
-       return 0;
-}
-
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       struct thread *thread = threads__findnew(event->comm.pid);
-
-       dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->comm.comm, event->comm.pid);
-
-       if (thread == NULL ||
-           thread__set_comm(thread, event->comm.comm)) {
-               dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
-               return -1;
-       }
-       total_comm++;
-
-       return 0;
-}
-
-static int
-process_fork_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       struct thread *thread = threads__findnew(event->fork.pid);
-       struct thread *parent = threads__findnew(event->fork.ppid);
-
-       dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->fork.pid, event->fork.ppid);
-
-       /*
-        * A thread clone will have the same PID for both
-        * parent and child.
-        */
-       if (thread == parent)
-               return 0;
-
-       if (!thread || !parent || thread__fork(thread, parent)) {
-               dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
-               return -1;
-       }
-       total_fork++;
-
-       return 0;
-}
-
-static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       switch (event->header.type) {
-       case PERF_RECORD_SAMPLE:
-               return process_sample_event(event, offset, head);
-
-       case PERF_RECORD_MMAP:
-               return process_mmap_event(event, offset, head);
-
-       case PERF_RECORD_COMM:
-               return process_comm_event(event, offset, head);
-
-       case PERF_RECORD_FORK:
-               return process_fork_event(event, offset, head);
-       /*
-        * We dont process them right now but they are fine:
-        */
-
-       case PERF_RECORD_THROTTLE:
-       case PERF_RECORD_UNTHROTTLE:
-               return 0;
-
-       default:
-               return -1;
-       }
 
        return 0;
 }
@@ -602,115 +453,31 @@ static void find_annotations(void)
        }
 }
 
+static struct perf_file_handler file_handler = {
+       .process_sample_event   = process_sample_event,
+       .process_mmap_event     = event__process_mmap,
+       .process_comm_event     = event__process_comm,
+       .process_fork_event     = event__process_task,
+};
+
 static int __cmd_annotate(void)
 {
-       int ret, rc = EXIT_FAILURE;
-       unsigned long offset = 0;
-       unsigned long head = 0;
-       struct stat input_stat;
-       event_t *event;
-       uint32_t size;
-       char *buf;
-
-       register_idle_thread();
-
-       input = open(input_name, O_RDONLY);
-       if (input < 0) {
-               perror("failed to open file");
-               exit(-1);
-       }
-
-       ret = fstat(input, &input_stat);
-       if (ret < 0) {
-               perror("failed to stat file");
-               exit(-1);
-       }
-
-       if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
-               fprintf(stderr, "file: %s not owned by current user or root\n", input_name);
-               exit(-1);
-       }
-
-       if (!input_stat.st_size) {
-               fprintf(stderr, "zero-sized file, nothing to do!\n");
-               exit(0);
-       }
-
-       if (load_kernel(symbol_filter) < 0) {
-               perror("failed to load kernel symbols");
-               return EXIT_FAILURE;
-       }
-
-remap:
-       buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-                          MAP_SHARED, input, offset);
-       if (buf == MAP_FAILED) {
-               perror("failed to mmap file");
-               exit(-1);
-       }
-
-more:
-       event = (event_t *)(buf + head);
-
-       size = event->header.size;
-       if (!size)
-               size = 8;
-
-       if (head + event->header.size >= page_size * mmap_window) {
-               unsigned long shift = page_size * (head / page_size);
-               int munmap_ret;
-
-               munmap_ret = munmap(buf, page_size * mmap_window);
-               assert(munmap_ret == 0);
-
-               offset += shift;
-               head -= shift;
-               goto remap;
-       }
-
-       size = event->header.size;
-
-       dump_printf("%p [%p]: event: %d\n",
-                       (void *)(offset + head),
-                       (void *)(long)event->header.size,
-                       event->header.type);
-
-       if (!size || process_event(event, offset, head) < 0) {
-
-               dump_printf("%p [%p]: skipping unknown header type: %d\n",
-                       (void *)(offset + head),
-                       (void *)(long)(event->header.size),
-                       event->header.type);
-
-               total_unknown++;
-
-               /*
-                * assume we lost track of the stream, check alignment, and
-                * increment a single u64 in the hope to catch on again 'soon'.
-                */
-
-               if (unlikely(head & 7))
-                       head &= ~7ULL;
-
-               size = 8;
-       }
-
-       head += size;
-
-       if (offset + head < (unsigned long)input_stat.st_size)
-               goto more;
+       struct perf_header *header;
+       struct thread *idle;
+       int ret;
 
-       rc = EXIT_SUCCESS;
-       close(input);
+       idle = register_idle_thread();
+       register_perf_file_handler(&file_handler);
 
-       dump_printf("      IP events: %10ld\n", total);
-       dump_printf("    mmap events: %10ld\n", total_mmap);
-       dump_printf("    comm events: %10ld\n", total_comm);
-       dump_printf("    fork events: %10ld\n", total_fork);
-       dump_printf(" unknown events: %10ld\n", total_unknown);
+       ret = mmap_dispatch_perf_file(&header, input_name, 0, 0,
+                                     &event__cwdlen, &event__cwd);
+       if (ret)
+               return ret;
 
-       if (dump_trace)
+       if (dump_trace) {
+               event__print_totals();
                return 0;
+       }
 
        if (verbose > 3)
                threads__fprintf(stdout);
@@ -719,11 +486,11 @@ more:
                dsos__fprintf(stdout);
 
        collapse__resort();
-       output__resort(total);
+       output__resort(event__total[0]);
 
        find_annotations();
 
-       return rc;
+       return ret;
 }
 
 static const char * const annotate_usage[] = {
@@ -741,8 +508,9 @@ static const struct option options[] = {
                    "be more verbose (show symbol address, etc)"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
-       OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
-       OPT_BOOLEAN('m', "modules", &modules,
+       OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+                  "file", "vmlinux pathname"),
+       OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
                    "load module symbols - WARNING: use only with -k and LIVE kernel"),
        OPT_BOOLEAN('l', "print-line", &print_line,
                    "print matching source lines (may be slow)"),
@@ -768,9 +536,8 @@ static void setup_sorting(void)
 
 int cmd_annotate(int argc, const char **argv, const char *prefix __used)
 {
-       symbol__init(sizeof(struct sym_priv));
-
-       page_size = getpagesize();
+       if (symbol__init(&symbol_conf) < 0)
+               return -1;
 
        argc = parse_options(argc, argv, options, annotate_usage, 0);
 
index 90c39baae0de0bf9fe58c2ddb94caca8f8ab1556..e043eb83092aa3576a89016f70e8698e36c97e49 100644 (file)
@@ -12,6 +12,7 @@
  *
  * Available subsystem list:
  *  sched ... scheduler and IPC mechanism
+ *  mem   ... memory access performance
  *
  */
 
@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
          NULL                  }
 };
 
+static struct bench_suite mem_suites[] = {
+       { "memcpy",
+         "Simple memory copy in various ways",
+         bench_mem_memcpy },
+       { NULL,
+         NULL,
+         NULL             }
+};
+
 struct bench_subsys {
        const char *name;
        const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
        { "sched",
          "scheduler and IPC mechanism",
          sched_suites },
+       { "mem",
+         "memory access performance",
+         mem_suites },
        { NULL,
          NULL,
-         NULL         }
+         NULL       }
 };
 
 static void dump_suites(int subsys_index)
index 768f9c826312f8e8437e4ecedf5795d7edd674e1..9f810b17c25c58bfb1af1522dc6ec3d4eee85362 100644 (file)
@@ -179,7 +179,7 @@ static void add_man_viewer(const char *name)
 
        while (*p)
                p = &((*p)->next);
-       *p = calloc(1, (sizeof(**p) + len + 1));
+       *p = zalloc(sizeof(**p) + len + 1);
        strncpy((*p)->name, name, len);
 }
 
@@ -194,7 +194,7 @@ static void do_add_man_viewer_info(const char *name,
                                   size_t len,
                                   const char *value)
 {
-       struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1);
+       struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1);
 
        strncpy(new->name, name, len);
        new->info = strdup(value);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
new file mode 100644 (file)
index 0000000..047fef7
--- /dev/null
@@ -0,0 +1,807 @@
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+
+#include "util/parse-options.h"
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+#include "util/data_map.h"
+
+#include <linux/rbtree.h>
+
+struct alloc_stat;
+typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
+
+static char const              *input_name = "perf.data";
+
+static struct perf_header      *header;
+static u64                     sample_type;
+
+static int                     alloc_flag;
+static int                     caller_flag;
+
+static int                     alloc_lines = -1;
+static int                     caller_lines = -1;
+
+static bool                    raw_ip;
+
+static char                    default_sort_order[] = "frag,hit,bytes";
+
+static int                     *cpunode_map;
+static int                     max_cpu_num;
+
+struct alloc_stat {
+       u64     call_site;
+       u64     ptr;
+       u64     bytes_req;
+       u64     bytes_alloc;
+       u32     hit;
+       u32     pingpong;
+
+       short   alloc_cpu;
+
+       struct rb_node node;
+};
+
+static struct rb_root root_alloc_stat;
+static struct rb_root root_alloc_sorted;
+static struct rb_root root_caller_stat;
+static struct rb_root root_caller_sorted;
+
+static unsigned long total_requested, total_allocated;
+static unsigned long nr_allocs, nr_cross_allocs;
+
+struct raw_event_sample {
+       u32 size;
+       char data[0];
+};
+
+#define PATH_SYS_NODE  "/sys/devices/system/node"
+
+static void init_cpunode_map(void)
+{
+       FILE *fp;
+       int i;
+
+       fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
+       if (!fp) {
+               max_cpu_num = 4096;
+               return;
+       }
+
+       if (fscanf(fp, "%d", &max_cpu_num) < 1)
+               die("Failed to read 'kernel_max' from sysfs");
+       max_cpu_num++;
+
+       cpunode_map = calloc(max_cpu_num, sizeof(int));
+       if (!cpunode_map)
+               die("calloc");
+       for (i = 0; i < max_cpu_num; i++)
+               cpunode_map[i] = -1;
+       fclose(fp);
+}
+
+static void setup_cpunode_map(void)
+{
+       struct dirent *dent1, *dent2;
+       DIR *dir1, *dir2;
+       unsigned int cpu, mem;
+       char buf[PATH_MAX];
+
+       init_cpunode_map();
+
+       dir1 = opendir(PATH_SYS_NODE);
+       if (!dir1)
+               return;
+
+       while (true) {
+               dent1 = readdir(dir1);
+               if (!dent1)
+                       break;
+
+               if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+                       continue;
+
+               snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
+               dir2 = opendir(buf);
+               if (!dir2)
+                       continue;
+               while (true) {
+                       dent2 = readdir(dir2);
+                       if (!dent2)
+                               break;
+                       if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+                               continue;
+                       cpunode_map[cpu] = mem;
+               }
+       }
+}
+
+static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+                             int bytes_req, int bytes_alloc, int cpu)
+{
+       struct rb_node **node = &root_alloc_stat.rb_node;
+       struct rb_node *parent = NULL;
+       struct alloc_stat *data = NULL;
+
+       while (*node) {
+               parent = *node;
+               data = rb_entry(*node, struct alloc_stat, node);
+
+               if (ptr > data->ptr)
+                       node = &(*node)->rb_right;
+               else if (ptr < data->ptr)
+                       node = &(*node)->rb_left;
+               else
+                       break;
+       }
+
+       if (data && data->ptr == ptr) {
+               data->hit++;
+               data->bytes_req += bytes_req;
+               data->bytes_alloc += bytes_req;
+       } else {
+               data = malloc(sizeof(*data));
+               if (!data)
+                       die("malloc");
+               data->ptr = ptr;
+               data->pingpong = 0;
+               data->hit = 1;
+               data->bytes_req = bytes_req;
+               data->bytes_alloc = bytes_alloc;
+
+               rb_link_node(&data->node, parent, node);
+               rb_insert_color(&data->node, &root_alloc_stat);
+       }
+       data->call_site = call_site;
+       data->alloc_cpu = cpu;
+}
+
+static void insert_caller_stat(unsigned long call_site,
+                             int bytes_req, int bytes_alloc)
+{
+       struct rb_node **node = &root_caller_stat.rb_node;
+       struct rb_node *parent = NULL;
+       struct alloc_stat *data = NULL;
+
+       while (*node) {
+               parent = *node;
+               data = rb_entry(*node, struct alloc_stat, node);
+
+               if (call_site > data->call_site)
+                       node = &(*node)->rb_right;
+               else if (call_site < data->call_site)
+                       node = &(*node)->rb_left;
+               else
+                       break;
+       }
+
+       if (data && data->call_site == call_site) {
+               data->hit++;
+               data->bytes_req += bytes_req;
+               data->bytes_alloc += bytes_req;
+       } else {
+               data = malloc(sizeof(*data));
+               if (!data)
+                       die("malloc");
+               data->call_site = call_site;
+               data->pingpong = 0;
+               data->hit = 1;
+               data->bytes_req = bytes_req;
+               data->bytes_alloc = bytes_alloc;
+
+               rb_link_node(&data->node, parent, node);
+               rb_insert_color(&data->node, &root_caller_stat);
+       }
+}
+
+static void process_alloc_event(struct raw_event_sample *raw,
+                               struct event *event,
+                               int cpu,
+                               u64 timestamp __used,
+                               struct thread *thread __used,
+                               int node)
+{
+       unsigned long call_site;
+       unsigned long ptr;
+       int bytes_req;
+       int bytes_alloc;
+       int node1, node2;
+
+       ptr = raw_field_value(event, "ptr", raw->data);
+       call_site = raw_field_value(event, "call_site", raw->data);
+       bytes_req = raw_field_value(event, "bytes_req", raw->data);
+       bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
+
+       insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
+       insert_caller_stat(call_site, bytes_req, bytes_alloc);
+
+       total_requested += bytes_req;
+       total_allocated += bytes_alloc;
+
+       if (node) {
+               node1 = cpunode_map[cpu];
+               node2 = raw_field_value(event, "node", raw->data);
+               if (node1 != node2)
+                       nr_cross_allocs++;
+       }
+       nr_allocs++;
+}
+
+static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
+static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+
+static struct alloc_stat *search_alloc_stat(unsigned long ptr,
+                                           unsigned long call_site,
+                                           struct rb_root *root,
+                                           sort_fn_t sort_fn)
+{
+       struct rb_node *node = root->rb_node;
+       struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
+
+       while (node) {
+               struct alloc_stat *data;
+               int cmp;
+
+               data = rb_entry(node, struct alloc_stat, node);
+
+               cmp = sort_fn(&key, data);
+               if (cmp < 0)
+                       node = node->rb_left;
+               else if (cmp > 0)
+                       node = node->rb_right;
+               else
+                       return data;
+       }
+       return NULL;
+}
+
+static void process_free_event(struct raw_event_sample *raw,
+                              struct event *event,
+                              int cpu,
+                              u64 timestamp __used,
+                              struct thread *thread __used)
+{
+       unsigned long ptr;
+       struct alloc_stat *s_alloc, *s_caller;
+
+       ptr = raw_field_value(event, "ptr", raw->data);
+
+       s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
+       if (!s_alloc)
+               return;
+
+       if (cpu != s_alloc->alloc_cpu) {
+               s_alloc->pingpong++;
+
+               s_caller = search_alloc_stat(0, s_alloc->call_site,
+                                            &root_caller_stat, callsite_cmp);
+               assert(s_caller);
+               s_caller->pingpong++;
+       }
+       s_alloc->alloc_cpu = -1;
+}
+
+static void
+process_raw_event(event_t *raw_event __used, void *more_data,
+                 int cpu, u64 timestamp, struct thread *thread)
+{
+       struct raw_event_sample *raw = more_data;
+       struct event *event;
+       int type;
+
+       type = trace_parse_common_type(raw->data);
+       event = trace_find_event(type);
+
+       if (!strcmp(event->name, "kmalloc") ||
+           !strcmp(event->name, "kmem_cache_alloc")) {
+               process_alloc_event(raw, event, cpu, timestamp, thread, 0);
+               return;
+       }
+
+       if (!strcmp(event->name, "kmalloc_node") ||
+           !strcmp(event->name, "kmem_cache_alloc_node")) {
+               process_alloc_event(raw, event, cpu, timestamp, thread, 1);
+               return;
+       }
+
+       if (!strcmp(event->name, "kfree") ||
+           !strcmp(event->name, "kmem_cache_free")) {
+               process_free_event(raw, event, cpu, timestamp, thread);
+               return;
+       }
+}
+
+static int process_sample_event(event_t *event)
+{
+       u64 ip = event->ip.ip;
+       u64 timestamp = -1;
+       u32 cpu = -1;
+       u64 period = 1;
+       void *more_data = event->ip.__more_data;
+       struct thread *thread = threads__findnew(event->ip.pid);
+
+       if (sample_type & PERF_SAMPLE_TIME) {
+               timestamp = *(u64 *)more_data;
+               more_data += sizeof(u64);
+       }
+
+       if (sample_type & PERF_SAMPLE_CPU) {
+               cpu = *(u32 *)more_data;
+               more_data += sizeof(u32);
+               more_data += sizeof(u32); /* reserved */
+       }
+
+       if (sample_type & PERF_SAMPLE_PERIOD) {
+               period = *(u64 *)more_data;
+               more_data += sizeof(u64);
+       }
+
+       dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
+               event->header.misc,
+               event->ip.pid, event->ip.tid,
+               (void *)(long)ip,
+               (long long)period);
+
+       if (thread == NULL) {
+               pr_debug("problem processing %d event, skipping it.\n",
+                        event->header.type);
+               return -1;
+       }
+
+       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+       process_raw_event(event, more_data, cpu, timestamp, thread);
+
+       return 0;
+}
+
+static int sample_type_check(u64 type)
+{
+       sample_type = type;
+
+       if (!(sample_type & PERF_SAMPLE_RAW)) {
+               fprintf(stderr,
+                       "No trace sample to read. Did you call perf record "
+                       "without -R?");
+               return -1;
+       }
+
+       return 0;
+}
+
+static struct perf_file_handler file_handler = {
+       .process_sample_event   = process_sample_event,
+       .process_comm_event     = event__process_comm,
+       .sample_type_check      = sample_type_check,
+};
+
+static int read_events(void)
+{
+       register_idle_thread();
+       register_perf_file_handler(&file_handler);
+
+       return mmap_dispatch_perf_file(&header, input_name, 0, 0,
+                                      &event__cwdlen, &event__cwd);
+}
+
+static double fragmentation(unsigned long n_req, unsigned long n_alloc)
+{
+       if (n_alloc == 0)
+               return 0.0;
+       else
+               return 100.0 - (100.0 * n_req / n_alloc);
+}
+
+static void __print_result(struct rb_root *root, int n_lines, int is_caller)
+{
+       struct rb_node *next;
+
+       printf("%.102s\n", graph_dotted_line);
+       printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
+       printf(" Total_alloc/Per | Total_req/Per   | Hit   | Ping-pong | Frag\n");
+       printf("%.102s\n", graph_dotted_line);
+
+       next = rb_first(root);
+
+       while (next && n_lines--) {
+               struct alloc_stat *data = rb_entry(next, struct alloc_stat,
+                                                  node);
+               struct symbol *sym = NULL;
+               char buf[BUFSIZ];
+               u64 addr;
+
+               if (is_caller) {
+                       addr = data->call_site;
+                       if (!raw_ip)
+                               sym = thread__find_function(kthread, addr, NULL);
+               } else
+                       addr = data->ptr;
+
+               if (sym != NULL)
+                       snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
+                                addr - sym->start);
+               else
+                       snprintf(buf, sizeof(buf), "%#Lx", addr);
+               printf(" %-34s |", buf);
+
+               printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
+                      (unsigned long long)data->bytes_alloc,
+                      (unsigned long)data->bytes_alloc / data->hit,
+                      (unsigned long long)data->bytes_req,
+                      (unsigned long)data->bytes_req / data->hit,
+                      (unsigned long)data->hit,
+                      (unsigned long)data->pingpong,
+                      fragmentation(data->bytes_req, data->bytes_alloc));
+
+               next = rb_next(next);
+       }
+
+       if (n_lines == -1)
+               printf(" ...                                | ...             | ...             | ...    | ...      | ...   \n");
+
+       printf("%.102s\n", graph_dotted_line);
+}
+
+static void print_summary(void)
+{
+       printf("\nSUMMARY\n=======\n");
+       printf("Total bytes requested: %lu\n", total_requested);
+       printf("Total bytes allocated: %lu\n", total_allocated);
+       printf("Total bytes wasted on internal fragmentation: %lu\n",
+              total_allocated - total_requested);
+       printf("Internal fragmentation: %f%%\n",
+              fragmentation(total_requested, total_allocated));
+       printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
+}
+
+static void print_result(void)
+{
+       if (caller_flag)
+               __print_result(&root_caller_sorted, caller_lines, 1);
+       if (alloc_flag)
+               __print_result(&root_alloc_sorted, alloc_lines, 0);
+       print_summary();
+}
+
+struct sort_dimension {
+       const char              name[20];
+       sort_fn_t               cmp;
+       struct list_head        list;
+};
+
+static LIST_HEAD(caller_sort);
+static LIST_HEAD(alloc_sort);
+
+static void sort_insert(struct rb_root *root, struct alloc_stat *data,
+                       struct list_head *sort_list)
+{
+       struct rb_node **new = &(root->rb_node);
+       struct rb_node *parent = NULL;
+       struct sort_dimension *sort;
+
+       while (*new) {
+               struct alloc_stat *this;
+               int cmp = 0;
+
+               this = rb_entry(*new, struct alloc_stat, node);
+               parent = *new;
+
+               list_for_each_entry(sort, sort_list, list) {
+                       cmp = sort->cmp(data, this);
+                       if (cmp)
+                               break;
+               }
+
+               if (cmp > 0)
+                       new = &((*new)->rb_left);
+               else
+                       new = &((*new)->rb_right);
+       }
+
+       rb_link_node(&data->node, parent, new);
+       rb_insert_color(&data->node, root);
+}
+
+static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
+                         struct list_head *sort_list)
+{
+       struct rb_node *node;
+       struct alloc_stat *data;
+
+       for (;;) {
+               node = rb_first(root);
+               if (!node)
+                       break;
+
+               rb_erase(node, root);
+               data = rb_entry(node, struct alloc_stat, node);
+               sort_insert(root_sorted, data, sort_list);
+       }
+}
+
+static void sort_result(void)
+{
+       __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
+       __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
+}
+
+static int __cmd_kmem(void)
+{
+       setup_pager();
+       read_events();
+       sort_result();
+       print_result();
+
+       return 0;
+}
+
+static const char * const kmem_usage[] = {
+       "perf kmem [<options>] {record}",
+       NULL
+};
+
+static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+       if (l->ptr < r->ptr)
+               return -1;
+       else if (l->ptr > r->ptr)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension ptr_sort_dimension = {
+       .name   = "ptr",
+       .cmp    = ptr_cmp,
+};
+
+static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+       if (l->call_site < r->call_site)
+               return -1;
+       else if (l->call_site > r->call_site)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension callsite_sort_dimension = {
+       .name   = "callsite",
+       .cmp    = callsite_cmp,
+};
+
+static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+       if (l->hit < r->hit)
+               return -1;
+       else if (l->hit > r->hit)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension hit_sort_dimension = {
+       .name   = "hit",
+       .cmp    = hit_cmp,
+};
+
+static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+       if (l->bytes_alloc < r->bytes_alloc)
+               return -1;
+       else if (l->bytes_alloc > r->bytes_alloc)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension bytes_sort_dimension = {
+       .name   = "bytes",
+       .cmp    = bytes_cmp,
+};
+
+static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+       double x, y;
+
+       x = fragmentation(l->bytes_req, l->bytes_alloc);
+       y = fragmentation(r->bytes_req, r->bytes_alloc);
+
+       if (x < y)
+               return -1;
+       else if (x > y)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension frag_sort_dimension = {
+       .name   = "frag",
+       .cmp    = frag_cmp,
+};
+
+static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+       if (l->pingpong < r->pingpong)
+               return -1;
+       else if (l->pingpong > r->pingpong)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension pingpong_sort_dimension = {
+       .name   = "pingpong",
+       .cmp    = pingpong_cmp,
+};
+
+static struct sort_dimension *avail_sorts[] = {
+       &ptr_sort_dimension,
+       &callsite_sort_dimension,
+       &hit_sort_dimension,
+       &bytes_sort_dimension,
+       &frag_sort_dimension,
+       &pingpong_sort_dimension,
+};
+
+#define NUM_AVAIL_SORTS        \
+       (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
+
+static int sort_dimension__add(const char *tok, struct list_head *list)
+{
+       struct sort_dimension *sort;
+       int i;
+
+       for (i = 0; i < NUM_AVAIL_SORTS; i++) {
+               if (!strcmp(avail_sorts[i]->name, tok)) {
+                       sort = malloc(sizeof(*sort));
+                       if (!sort)
+                               die("malloc");
+                       memcpy(sort, avail_sorts[i], sizeof(*sort));
+                       list_add_tail(&sort->list, list);
+                       return 0;
+               }
+       }
+
+       return -1;
+}
+
+static int setup_sorting(struct list_head *sort_list, const char *arg)
+{
+       char *tok;
+       char *str = strdup(arg);
+
+       if (!str)
+               die("strdup");
+
+       while (true) {
+               tok = strsep(&str, ",");
+               if (!tok)
+                       break;
+               if (sort_dimension__add(tok, sort_list) < 0) {
+                       error("Unknown --sort key: '%s'", tok);
+                       return -1;
+               }
+       }
+
+       free(str);
+       return 0;
+}
+
+static int parse_sort_opt(const struct option *opt __used,
+                         const char *arg, int unset __used)
+{
+       if (!arg)
+               return -1;
+
+       if (caller_flag > alloc_flag)
+               return setup_sorting(&caller_sort, arg);
+       else
+               return setup_sorting(&alloc_sort, arg);
+
+       return 0;
+}
+
+static int parse_stat_opt(const struct option *opt __used,
+                         const char *arg, int unset __used)
+{
+       if (!arg)
+               return -1;
+
+       if (strcmp(arg, "alloc") == 0)
+               alloc_flag = (caller_flag + 1);
+       else if (strcmp(arg, "caller") == 0)
+               caller_flag = (alloc_flag + 1);
+       else
+               return -1;
+       return 0;
+}
+
+static int parse_line_opt(const struct option *opt __used,
+                         const char *arg, int unset __used)
+{
+       int lines;
+
+       if (!arg)
+               return -1;
+
+       lines = strtoul(arg, NULL, 10);
+
+       if (caller_flag > alloc_flag)
+               caller_lines = lines;
+       else
+               alloc_lines = lines;
+
+       return 0;
+}
+
+static const struct option kmem_options[] = {
+       OPT_STRING('i', "input", &input_name, "file",
+                  "input file name"),
+       OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
+                    "stat selector, Pass 'alloc' or 'caller'.",
+                    parse_stat_opt),
+       OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
+                    "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
+                    parse_sort_opt),
+       OPT_CALLBACK('l', "line", NULL, "num",
+                    "show n lins",
+                    parse_line_opt),
+       OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
+       OPT_END()
+};
+
+static const char *record_args[] = {
+       "record",
+       "-a",
+       "-R",
+       "-M",
+       "-f",
+       "-c", "1",
+       "-e", "kmem:kmalloc",
+       "-e", "kmem:kmalloc_node",
+       "-e", "kmem:kfree",
+       "-e", "kmem:kmem_cache_alloc",
+       "-e", "kmem:kmem_cache_alloc_node",
+       "-e", "kmem:kmem_cache_free",
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+       unsigned int rec_argc, i, j;
+       const char **rec_argv;
+
+       rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+       rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+       for (i = 0; i < ARRAY_SIZE(record_args); i++)
+               rec_argv[i] = strdup(record_args[i]);
+
+       for (j = 1; j < (unsigned int)argc; j++, i++)
+               rec_argv[i] = argv[j];
+
+       return cmd_record(i, rec_argv, NULL);
+}
+
+int cmd_kmem(int argc, const char **argv, const char *prefix __used)
+{
+       symbol__init(0);
+
+       argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
+
+       if (argc && !strncmp(argv[0], "rec", 3))
+               return __cmd_record(argc, argv);
+       else if (argc)
+               usage_with_options(kmem_usage, kmem_options);
+
+       if (list_empty(&caller_sort))
+               setup_sorting(&caller_sort, default_sort_order);
+       if (list_empty(&alloc_sort))
+               setup_sorting(&alloc_sort, default_sort_order);
+
+       setup_cpunode_map();
+
+       return __cmd_kmem();
+}
+
index d78a3d945492c0b8070479d91bea45a5dc8ec8cb..a58e11b7ea8000de71659a93f093d5d2dcddcb73 100644 (file)
@@ -40,6 +40,7 @@
 #include "util/parse-options.h"
 #include "util/parse-events.h" /* For debugfs_path */
 #include "util/probe-finder.h"
+#include "util/probe-event.h"
 
 /* Default vmlinux search paths */
 #define NR_SEARCH_PATH 3
@@ -51,8 +52,6 @@ const char *default_search_path[NR_SEARCH_PATH] = {
 
 #define MAX_PATH_LEN 256
 #define MAX_PROBES 128
-#define MAX_PROBE_ARGS 128
-#define PERFPROBE_GROUP "probe"
 
 /* Session management structure */
 static struct {
@@ -63,152 +62,19 @@ static struct {
        struct probe_point probes[MAX_PROBES];
 } session;
 
-#define semantic_error(msg ...) die("Semantic error :" msg)
-
-/* Parse probe point. Return 1 if return probe */
-static void parse_probe_point(char *arg, struct probe_point *pp)
-{
-       char *ptr, *tmp;
-       char c, nc = 0;
-       /*
-        * <Syntax>
-        * perf probe SRC:LN
-        * perf probe FUNC[+OFFS|%return][@SRC]
-        */
-
-       ptr = strpbrk(arg, ":+@%");
-       if (ptr) {
-               nc = *ptr;
-               *ptr++ = '\0';
-       }
-
-       /* Check arg is function or file and copy it */
-       if (strchr(arg, '.'))   /* File */
-               pp->file = strdup(arg);
-       else                    /* Function */
-               pp->function = strdup(arg);
-       DIE_IF(pp->file == NULL && pp->function == NULL);
-
-       /* Parse other options */
-       while (ptr) {
-               arg = ptr;
-               c = nc;
-               ptr = strpbrk(arg, ":+@%");
-               if (ptr) {
-                       nc = *ptr;
-                       *ptr++ = '\0';
-               }
-               switch (c) {
-               case ':':       /* Line number */
-                       pp->line = strtoul(arg, &tmp, 0);
-                       if (*tmp != '\0')
-                               semantic_error("There is non-digit charactor"
-                                               " in line number.");
-                       break;
-               case '+':       /* Byte offset from a symbol */
-                       pp->offset = strtoul(arg, &tmp, 0);
-                       if (*tmp != '\0')
-                               semantic_error("There is non-digit charactor"
-                                               " in offset.");
-                       break;
-               case '@':       /* File name */
-                       if (pp->file)
-                               semantic_error("SRC@SRC is not allowed.");
-                       pp->file = strdup(arg);
-                       DIE_IF(pp->file == NULL);
-                       if (ptr)
-                               semantic_error("@SRC must be the last "
-                                              "option.");
-                       break;
-               case '%':       /* Probe places */
-                       if (strcmp(arg, "return") == 0) {
-                               pp->retprobe = 1;
-                       } else  /* Others not supported yet */
-                               semantic_error("%%%s is not supported.", arg);
-                       break;
-               default:
-                       DIE_IF("Program has a bug.");
-                       break;
-               }
-       }
-
-       /* Exclusion check */
-       if (pp->line && pp->offset)
-               semantic_error("Offset can't be used with line number.");
-       if (!pp->line && pp->file && !pp->function)
-               semantic_error("File always requires line number.");
-       if (pp->offset && !pp->function)
-               semantic_error("Offset requires an entry function.");
-       if (pp->retprobe && !pp->function)
-               semantic_error("Return probe requires an entry function.");
-       if ((pp->offset || pp->line) && pp->retprobe)
-               semantic_error("Offset/Line can't be used with return probe.");
-
-       pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n",
-                pp->function, pp->file, pp->line, pp->offset, pp->retprobe);
-}
+static bool listing;
 
 /* Parse an event definition. Note that any error must die. */
 static void parse_probe_event(const char *str)
 {
-       char *argv[MAX_PROBE_ARGS + 2]; /* Event + probe + args */
-       int argc, i;
        struct probe_point *pp = &session.probes[session.nr_probe];
 
        pr_debug("probe-definition(%d): %s\n", session.nr_probe, str);
        if (++session.nr_probe == MAX_PROBES)
-               semantic_error("Too many probes");
-
-       /* Separate arguments, similar to argv_split */
-       argc = 0;
-       do {
-               /* Skip separators */
-               while (isspace(*str))
-                       str++;
-
-               /* Add an argument */
-               if (*str != '\0') {
-                       const char *s = str;
-
-                       /* Skip the argument */
-                       while (!isspace(*str) && *str != '\0')
-                               str++;
-
-                       /* Duplicate the argument */
-                       argv[argc] = strndup(s, str - s);
-                       if (argv[argc] == NULL)
-                               die("strndup");
-                       if (++argc == MAX_PROBE_ARGS)
-                               semantic_error("Too many arguments");
-                       pr_debug("argv[%d]=%s\n", argc, argv[argc - 1]);
-               }
-       } while (*str != '\0');
-       if (!argc)
-               semantic_error("An empty argument.");
-
-       /* Parse probe point */
-       parse_probe_point(argv[0], pp);
-       free(argv[0]);
-       if (pp->file || pp->line)
-               session.need_dwarf = 1;
-
-       /* Copy arguments */
-       pp->nr_args = argc - 1;
-       if (pp->nr_args > 0) {
-               pp->args = (char **)malloc(sizeof(char *) * pp->nr_args);
-               if (!pp->args)
-                       die("malloc");
-               memcpy(pp->args, &argv[1], sizeof(char *) * pp->nr_args);
-       }
+               die("Too many probes (> %d) are specified.", MAX_PROBES);
 
-       /* Ensure return probe has no C argument */
-       for (i = 0; i < pp->nr_args; i++)
-               if (is_c_varname(pp->args[i])) {
-                       if (pp->retprobe)
-                               semantic_error("You can't specify local"
-                                               " variable for kretprobe");
-                       session.need_dwarf = 1;
-               }
+       /* Parse perf-probe event into probe_point */
+       session.need_dwarf = parse_perf_probe_event(str, pp);
 
        pr_debug("%d arguments\n", pp->nr_args);
 }
@@ -255,6 +121,7 @@ static int open_default_vmlinux(void)
 static const char * const probe_usage[] = {
        "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
        "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
+       "perf probe --list",
        NULL
 };
 
@@ -265,6 +132,7 @@ static const struct option options[] = {
        OPT_STRING('k', "vmlinux", &session.vmlinux, "file",
                "vmlinux/module pathname"),
 #endif
+       OPT_BOOLEAN('l', "list", &listing, "list up current probes"),
        OPT_CALLBACK('a', "add", NULL,
 #ifdef NO_LIBDWARF
                "FUNC[+OFFS|%return] [ARG ...]",
@@ -285,73 +153,38 @@ static const struct option options[] = {
                "\t\tALN:\tAbsolute line number in file.\n"
                "\t\tARG:\tProbe argument (local variable name or\n"
 #endif
-               "\t\t\tkprobe-tracer argument format is supported.)\n",
+               "\t\t\tkprobe-tracer argument format.)\n",
                opt_add_probe_event),
        OPT_END()
 };
 
-static int write_new_event(int fd, const char *buf)
-{
-       int ret;
-
-       ret = write(fd, buf, strlen(buf));
-       if (ret <= 0)
-               die("Failed to create event.");
-       else
-               printf("Added new event: %s\n", buf);
-
-       return ret;
-}
-
-#define MAX_CMDLEN 256
-
-static int synthesize_probe_event(struct probe_point *pp)
-{
-       char *buf;
-       int i, len, ret;
-       pp->probes[0] = buf = (char *)calloc(MAX_CMDLEN, sizeof(char));
-       if (!buf)
-               die("Failed to allocate memory by calloc.");
-       ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset);
-       if (ret <= 0 || ret >= MAX_CMDLEN)
-               goto error;
-       len = ret;
-
-       for (i = 0; i < pp->nr_args; i++) {
-               ret = snprintf(&buf[len], MAX_CMDLEN - len, " %s",
-                              pp->args[i]);
-               if (ret <= 0 || ret >= MAX_CMDLEN - len)
-                       goto error;
-               len += ret;
-       }
-       pp->found = 1;
-       return pp->found;
-error:
-       free(pp->probes[0]);
-       if (ret > 0)
-               ret = -E2BIG;
-       return ret;
-}
-
 int cmd_probe(int argc, const char **argv, const char *prefix __used)
 {
-       int i, j, fd, ret;
+       int i, j, ret;
+#ifndef NO_LIBDWARF
+       int fd;
+#endif
        struct probe_point *pp;
-       char buf[MAX_CMDLEN];
 
        argc = parse_options(argc, argv, options, probe_usage,
                             PARSE_OPT_STOP_AT_NON_OPTION);
        for (i = 0; i < argc; i++)
                parse_probe_event(argv[i]);
 
-       if (session.nr_probe == 0)
+       if ((session.nr_probe == 0 && !listing) ||
+           (session.nr_probe != 0 && listing))
                usage_with_options(probe_usage, options);
 
+       if (listing) {
+               show_perf_probe_events();
+               return 0;
+       }
+
        if (session.need_dwarf)
 #ifdef NO_LIBDWARF
-               semantic_error("Debuginfo-analysis is not supported");
+               die("Debuginfo-analysis is not supported");
 #else  /* !NO_LIBDWARF */
-               pr_info("Some probes require debuginfo.\n");
+               pr_debug("Some probes require debuginfo.\n");
 
        if (session.vmlinux)
                fd = open(session.vmlinux, O_RDONLY);
@@ -395,41 +228,15 @@ end_dwarf:
                if (pp->found)  /* This probe is already found. */
                        continue;
 
-               ret = synthesize_probe_event(pp);
+               ret = synthesize_trace_kprobe_event(pp);
                if (ret == -E2BIG)
-                       semantic_error("probe point is too long.");
+                       die("probe point definition becomes too long.");
                else if (ret < 0)
                        die("Failed to synthesize a probe point.");
        }
 
        /* Settng up probe points */
-       snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path);
-       fd = open(buf, O_WRONLY, O_APPEND);
-       if (fd < 0) {
-               if (errno == ENOENT)
-                       die("kprobe_events file does not exist - please rebuild with CONFIG_KPROBE_TRACER.");
-               else
-                       die("Could not open kprobe_events file: %s",
-                           strerror(errno));
-       }
-       for (j = 0; j < session.nr_probe; j++) {
-               pp = &session.probes[j];
-               if (pp->found == 1) {
-                       snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x %s\n",
-                               pp->retprobe ? 'r' : 'p', PERFPROBE_GROUP,
-                               pp->function, pp->offset, pp->probes[0]);
-                       write_new_event(fd, buf);
-               } else
-                       for (i = 0; i < pp->found; i++) {
-                               snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x_%d %s\n",
-                                       pp->retprobe ? 'r' : 'p',
-                                       PERFPROBE_GROUP,
-                                       pp->function, pp->offset, i,
-                                       pp->probes[0]);
-                               write_new_event(fd, buf);
-                       }
-       }
-       close(fd);
+       add_trace_kprobe_events(session.probes, session.nr_probe);
        return 0;
 }
 
index 82260c56db3d5f94037868a7c5587452f44f910f..0e519c667e3ac47f8fe9576575f62dc7b1991d83 100644 (file)
@@ -307,6 +307,12 @@ try_again:
                printf("\n");
                error("perfcounter syscall returned with %d (%s)\n",
                        fd[nr_cpu][counter], strerror(err));
+
+#if defined(__i386__) || defined(__x86_64__)
+               if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
+                       die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n");
+#endif
+
                die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
                exit(-1);
        }
@@ -400,7 +406,7 @@ static int __cmd_record(int argc, const char **argv)
        struct stat st;
        pid_t pid = 0;
        int flags;
-       int ret;
+       int err;
        unsigned long waking = 0;
 
        page_size = sysconf(_SC_PAGE_SIZE);
@@ -434,16 +440,18 @@ static int __cmd_record(int argc, const char **argv)
                exit(-1);
        }
 
-       if (!file_new)
-               header = perf_header__read(output);
-       else
-               header = perf_header__new();
-
+       header = perf_header__new();
        if (header == NULL) {
                pr_err("Not enough memory for reading perf file header\n");
                return -1;
        }
 
+       if (!file_new) {
+               err = perf_header__read(header, output);
+               if (err < 0)
+                       return err;
+       }
+
        if (raw_samples) {
                perf_header__set_feat(header, HEADER_TRACE_INFO);
        } else {
@@ -472,8 +480,11 @@ static int __cmd_record(int argc, const char **argv)
                }
        }
 
-       if (file_new)
-               perf_header__write(header, output, false);
+       if (file_new) {
+               err = perf_header__write(header, output, false);
+               if (err < 0)
+                       return err;
+       }
 
        if (!system_wide)
                event__synthesize_thread(pid, process_synthesized_event);
@@ -527,7 +538,7 @@ static int __cmd_record(int argc, const char **argv)
                if (hits == samples) {
                        if (done)
                                break;
-                       ret = poll(event_array, nr_poll, -1);
+                       err = poll(event_array, nr_poll, -1);
                        waking++;
                }
 
index 1a806d5f05cfbd76ab83630304027e0cedeb9ff9..383c4ab4f9af06e6cfaa0e6f6e358aa63e1d1d68 100644 (file)
@@ -52,13 +52,12 @@ static int          exclude_other = 1;
 
 static char            callchain_default_opt[] = "fractal,0.5";
 
-static char            *cwd;
-static int             cwdlen;
-
 static struct perf_header *header;
 
 static u64             sample_type;
 
+struct symbol_conf     symbol_conf;
+
 
 static size_t
 callchain__fprintf_left_margin(FILE *fp, int left_margin)
@@ -409,55 +408,6 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm)
        return 0;
 }
 
-
-static struct symbol *
-resolve_symbol(struct thread *thread, struct map **mapp, u64 *ipp)
-{
-       struct map *map = mapp ? *mapp : NULL;
-       u64 ip = *ipp;
-
-       if (map)
-               goto got_map;
-
-       if (!thread)
-               return NULL;
-
-       map = thread__find_map(thread, ip);
-       if (map != NULL) {
-               /*
-                * We have to do this here as we may have a dso
-                * with no symbol hit that has a name longer than
-                * the ones with symbols sampled.
-                */
-               if (!sort_dso.elide && !map->dso->slen_calculated)
-                       dso__calc_col_width(map->dso);
-
-               if (mapp)
-                       *mapp = map;
-got_map:
-               ip = map->map_ip(map, ip);
-       } else {
-               /*
-                * If this is outside of all known maps,
-                * and is a negative address, try to look it
-                * up in the kernel dso, as it might be a
-                * vsyscall or vdso (which executes in user-mode).
-                *
-                * XXX This is nasty, we should have a symbol list in
-                * the "[vdso]" dso, but for now lets use the old
-                * trick of looking in the whole kernel symbol list.
-                */
-               if ((long long)ip < 0)
-                       return kernel_maps__find_symbol(ip, mapp);
-       }
-       dump_printf(" ...... dso: %s\n",
-                   map ? map->dso->long_name : "<not found>");
-       dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip);
-       *ipp  = ip;
-
-       return map ? map__find_symbol(map, ip, NULL) : NULL;
-}
-
 static int call__match(struct symbol *sym)
 {
        if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
@@ -466,11 +416,11 @@ static int call__match(struct symbol *sym)
        return 0;
 }
 
-static struct symbol **resolve_callchain(struct thread *thread, struct map *map,
+static struct symbol **resolve_callchain(struct thread *thread,
                                         struct ip_callchain *chain,
                                         struct symbol **parent)
 {
-       u64 context = PERF_CONTEXT_MAX;
+       u8 cpumode = PERF_RECORD_MISC_USER;
        struct symbol **syms = NULL;
        unsigned int i;
 
@@ -484,30 +434,31 @@ static struct symbol **resolve_callchain(struct thread *thread, struct map *map,
 
        for (i = 0; i < chain->nr; i++) {
                u64 ip = chain->ips[i];
-               struct symbol *sym = NULL;
+               struct addr_location al;
 
                if (ip >= PERF_CONTEXT_MAX) {
-                       context = ip;
+                       switch (ip) {
+                       case PERF_CONTEXT_HV:
+                               cpumode = PERF_RECORD_MISC_HYPERVISOR;  break;
+                       case PERF_CONTEXT_KERNEL:
+                               cpumode = PERF_RECORD_MISC_KERNEL;      break;
+                       case PERF_CONTEXT_USER:
+                               cpumode = PERF_RECORD_MISC_USER;        break;
+                       default:
+                               break;
+                       }
                        continue;
                }
 
-               switch (context) {
-               case PERF_CONTEXT_HV:
-                       break;
-               case PERF_CONTEXT_KERNEL:
-                       sym = kernel_maps__find_symbol(ip, &map);
-                       break;
-               default:
-                       sym = resolve_symbol(thread, &map, &ip);
-                       break;
-               }
-
-               if (sym) {
-                       if (sort__has_parent && !*parent && call__match(sym))
-                               *parent = sym;
+               thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+                                          ip, &al, NULL);
+               if (al.sym != NULL) {
+                       if (sort__has_parent && !*parent &&
+                           call__match(al.sym))
+                               *parent = al.sym;
                        if (!callchain)
                                break;
-                       syms[i] = sym;
+                       syms[i] = al.sym;
                }
        }
 
@@ -518,20 +469,17 @@ static struct symbol **resolve_callchain(struct thread *thread, struct map *map,
  * collect histogram counts
  */
 
-static int
-hist_entry__add(struct thread *thread, struct map *map,
-               struct symbol *sym, u64 ip, struct ip_callchain *chain,
-               char level, u64 count)
+static int hist_entry__add(struct addr_location *al,
+                          struct ip_callchain *chain, u64 count)
 {
        struct symbol **syms = NULL, *parent = NULL;
        bool hit;
        struct hist_entry *he;
 
        if ((sort__has_parent || callchain) && chain)
-               syms = resolve_callchain(thread, map, chain, &parent);
+               syms = resolve_callchain(al->thread, chain, &parent);
 
-       he = __hist_entry__add(thread, map, sym, parent,
-                              ip, count, level, &hit);
+       he = __hist_entry__add(al, parent, count, &hit);
        if (he == NULL)
                return -ENOMEM;
 
@@ -655,17 +603,14 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
        return 0;
 }
 
-static int
-process_sample_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_sample_event(event_t *event)
 {
-       char level;
-       struct symbol *sym = NULL;
        u64 ip = event->ip.ip;
        u64 period = 1;
-       struct map *map = NULL;
        void *more_data = event->ip.__more_data;
        struct ip_callchain *chain = NULL;
        int cpumode;
+       struct addr_location al;
        struct thread *thread = threads__findnew(event->ip.pid);
 
        if (sample_type & PERF_SAMPLE_PERIOD) {
@@ -673,9 +618,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                more_data += sizeof(u64);
        }
 
-       dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
+       dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
                event->header.misc,
                event->ip.pid, event->ip.tid,
                (void *)(long)ip,
@@ -713,136 +656,51 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
        cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-       if (cpumode == PERF_RECORD_MISC_KERNEL) {
-               level = 'k';
-               sym = kernel_maps__find_symbol(ip, &map);
-               dump_printf(" ...... dso: %s\n",
-                           map ? map->dso->long_name : "<not found>");
-       } else if (cpumode == PERF_RECORD_MISC_USER) {
-               level = '.';
-               sym = resolve_symbol(thread, &map, &ip);
-
-       } else {
-               level = 'H';
-               dump_printf(" ...... dso: [hypervisor]\n");
-       }
+       thread__find_addr_location(thread, cpumode,
+                                  MAP__FUNCTION, ip, &al, NULL);
+       /*
+        * We have to do this here as we may have a dso with no symbol hit that
+        * has a name longer than the ones with symbols sampled.
+        */
+       if (al.map && !sort_dso.elide && !al.map->dso->slen_calculated)
+               dso__calc_col_width(al.map->dso);
 
        if (dso_list &&
-           (!map || !map->dso ||
-            !(strlist__has_entry(dso_list, map->dso->short_name) ||
-              (map->dso->short_name != map->dso->long_name &&
-               strlist__has_entry(dso_list, map->dso->long_name)))))
+           (!al.map || !al.map->dso ||
+            !(strlist__has_entry(dso_list, al.map->dso->short_name) ||
+              (al.map->dso->short_name != al.map->dso->long_name &&
+               strlist__has_entry(dso_list, al.map->dso->long_name)))))
                return 0;
 
-       if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
+       if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name))
                return 0;
 
-       if (hist_entry__add(thread, map, sym, ip,
-                           chain, level, period)) {
+       if (hist_entry__add(&al, chain, period)) {
                pr_debug("problem incrementing symbol count, skipping event\n");
                return -1;
        }
 
-       total += period;
+       event__stats.total += period;
 
        return 0;
 }
 
-static int
-process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       struct map *map = map__new(&event->mmap, cwd, cwdlen);
-       struct thread *thread = threads__findnew(event->mmap.pid);
-
-       dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->mmap.pid,
-               event->mmap.tid,
-               (void *)(long)event->mmap.start,
-               (void *)(long)event->mmap.len,
-               (void *)(long)event->mmap.pgoff,
-               event->mmap.filename);
-
-       if (thread == NULL || map == NULL) {
-               dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
-               return 0;
-       }
-
-       thread__insert_map(thread, map);
-       total_mmap++;
-
-       return 0;
-}
-
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_comm_event(event_t *event)
 {
        struct thread *thread = threads__findnew(event->comm.pid);
 
-       dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->comm.comm, event->comm.pid);
+       dump_printf(": %s:%d\n", event->comm.comm, event->comm.pid);
 
        if (thread == NULL ||
            thread__set_comm_adjust(thread, event->comm.comm)) {
                dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
                return -1;
        }
-       total_comm++;
-
-       return 0;
-}
-
-static int
-process_task_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       struct thread *thread = threads__findnew(event->fork.pid);
-       struct thread *parent = threads__findnew(event->fork.ppid);
-
-       dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT",
-               event->fork.pid, event->fork.tid,
-               event->fork.ppid, event->fork.ptid);
-
-       /*
-        * A thread clone will have the same PID for both
-        * parent and child.
-        */
-       if (thread == parent)
-               return 0;
-
-       if (event->header.type == PERF_RECORD_EXIT)
-               return 0;
-
-       if (!thread || !parent || thread__fork(thread, parent)) {
-               dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
-               return -1;
-       }
-       total_fork++;
-
-       return 0;
-}
-
-static int
-process_lost_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->lost.id,
-               event->lost.lost);
-
-       total_lost += event->lost.lost;
 
        return 0;
 }
 
-static int
-process_read_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_read_event(event_t *event)
 {
        struct perf_event_attr *attr;
 
@@ -858,14 +716,9 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head)
                                           event->read.value);
        }
 
-       dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n",
-                       (void *)(offset + head),
-                       (void *)(long)(event->header.size),
-                       event->read.pid,
-                       event->read.tid,
-                       attr ? __event_name(attr->type, attr->config)
-                            : "FAIL",
-                       event->read.value);
+       dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid,
+                   attr ? __event_name(attr->type, attr->config) : "FAIL",
+                   event->read.value);
 
        return 0;
 }
@@ -901,11 +754,11 @@ static int sample_type_check(u64 type)
 
 static struct perf_file_handler file_handler = {
        .process_sample_event   = process_sample_event,
-       .process_mmap_event     = process_mmap_event,
+       .process_mmap_event     = event__process_mmap,
        .process_comm_event     = process_comm_event,
-       .process_exit_event     = process_task_event,
-       .process_fork_event     = process_task_event,
-       .process_lost_event     = process_lost_event,
+       .process_exit_event     = event__process_task,
+       .process_fork_event     = event__process_task,
+       .process_lost_event     = event__process_lost,
        .process_read_event     = process_read_event,
        .sample_type_check      = sample_type_check,
 };
@@ -924,20 +777,15 @@ static int __cmd_report(void)
 
        register_perf_file_handler(&file_handler);
 
-       ret = mmap_dispatch_perf_file(&header, input_name, force, full_paths,
-                                     &cwdlen, &cwd);
+       ret = mmap_dispatch_perf_file(&header, input_name, force,
+                                     full_paths, &event__cwdlen, &event__cwd);
        if (ret)
                return ret;
 
-       dump_printf("      IP events: %10ld\n", total);
-       dump_printf("    mmap events: %10ld\n", total_mmap);
-       dump_printf("    comm events: %10ld\n", total_comm);
-       dump_printf("    fork events: %10ld\n", total_fork);
-       dump_printf("    lost events: %10ld\n", total_lost);
-       dump_printf(" unknown events: %10ld\n", file_handler.total_unknown);
-
-       if (dump_trace)
+       if (dump_trace) {
+               event__print_totals();
                return 0;
+       }
 
        if (verbose > 3)
                threads__fprintf(stdout);
@@ -946,8 +794,8 @@ static int __cmd_report(void)
                dsos__fprintf(stdout);
 
        collapse__resort();
-       output__resort(total);
-       output__fprintf(stdout, total);
+       output__resort(event__stats.total);
+       output__fprintf(stdout, event__stats.total);
 
        if (show_threads)
                perf_read_values_destroy(&show_threads_values);
@@ -1021,9 +869,10 @@ static const struct option options[] = {
                    "be more verbose (show symbol address, etc)"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
-       OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
+       OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+                  "file", "vmlinux pathname"),
        OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
-       OPT_BOOLEAN('m', "modules", &modules,
+       OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
                    "load module symbols - WARNING: use only with -k and LIVE kernel"),
        OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
                    "Show a column with the number of samples"),
@@ -1093,7 +942,8 @@ static void setup_list(struct strlist **list, const char *list_str,
 
 int cmd_report(int argc, const char **argv, const char *prefix __used)
 {
-       symbol__init(0);
+       if (symbol__init(&symbol_conf) < 0)
+               return -1;
 
        argc = parse_options(argc, argv, options, report_usage, 0);
 
index df44b756cecc1048abe16145d1a001783c5edfcd..26b782f26ee1a97c35f8ce8445b07aa470a35806 100644 (file)
@@ -22,8 +22,6 @@
 
 static char                    const *input_name = "perf.data";
 
-static unsigned long           total_comm = 0;
-
 static struct perf_header      *header;
 static u64                     sample_type;
 
@@ -32,9 +30,6 @@ static char                   *sort_order = default_sort_order;
 
 static int                     profile_cpu = -1;
 
-static char                    *cwd;
-static int                     cwdlen;
-
 #define PR_SET_NAME            15               /* Set process name */
 #define MAX_CPUS               4096
 
@@ -225,7 +220,7 @@ static void calibrate_sleep_measurement_overhead(void)
 static struct sched_atom *
 get_new_event(struct task_desc *task, u64 timestamp)
 {
-       struct sched_atom *event = calloc(1, sizeof(*event));
+       struct sched_atom *event = zalloc(sizeof(*event));
        unsigned long idx = task->nr_events;
        size_t size;
 
@@ -293,7 +288,7 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp,
                return;
        }
 
-       wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem));
+       wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
        sem_init(wakee_event->wait_sem, 0, 0);
        wakee_event->specific_wait = 1;
        event->wait_sem = wakee_event->wait_sem;
@@ -323,7 +318,7 @@ static struct task_desc *register_pid(unsigned long pid, const char *comm)
        if (task)
                return task;
 
-       task = calloc(1, sizeof(*task));
+       task = zalloc(sizeof(*task));
        task->pid = pid;
        task->nr = nr_tasks;
        strcpy(task->comm, comm);
@@ -633,27 +628,6 @@ static void test_calibrations(void)
        printf("the sleep test took %Ld nsecs\n", T1-T0);
 }
 
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       struct thread *thread = threads__findnew(event->comm.tid);
-
-       dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->comm.comm, event->comm.pid);
-
-       if (thread == NULL ||
-           thread__set_comm(thread, event->comm.comm)) {
-               dump_printf("problem processing perf_event_comm, skipping event.\n");
-               return -1;
-       }
-       total_comm++;
-
-       return 0;
-}
-
-
 struct raw_event_sample {
        u32 size;
        char data[0];
@@ -962,9 +936,7 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data,
 
 static void thread_atoms_insert(struct thread *thread)
 {
-       struct work_atoms *atoms;
-
-       atoms = calloc(sizeof(*atoms), 1);
+       struct work_atoms *atoms = zalloc(sizeof(*atoms));
        if (!atoms)
                die("No memory");
 
@@ -996,9 +968,7 @@ add_sched_out_event(struct work_atoms *atoms,
                    char run_state,
                    u64 timestamp)
 {
-       struct work_atom *atom;
-
-       atom = calloc(sizeof(*atom), 1);
+       struct work_atom *atom = zalloc(sizeof(*atom));
        if (!atom)
                die("Non memory");
 
@@ -1626,8 +1596,7 @@ process_raw_event(event_t *raw_event __used, void *more_data,
                process_sched_migrate_task_event(raw, event, cpu, timestamp, thread);
 }
 
-static int
-process_sample_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_sample_event(event_t *event)
 {
        struct thread *thread;
        u64 ip = event->ip.ip;
@@ -1657,9 +1626,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                more_data += sizeof(u64);
        }
 
-       dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
+       dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
                event->header.misc,
                event->ip.pid, event->ip.tid,
                (void *)(long)ip,
@@ -1681,10 +1648,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
        return 0;
 }
 
-static int
-process_lost_event(event_t *event __used,
-                  unsigned long offset __used,
-                  unsigned long head __used)
+static int process_lost_event(event_t *event __used)
 {
        nr_lost_chunks++;
        nr_lost_events += event->lost.lost;
@@ -1708,7 +1672,7 @@ static int sample_type_check(u64 type)
 
 static struct perf_file_handler file_handler = {
        .process_sample_event   = process_sample_event,
-       .process_comm_event     = process_comm_event,
+       .process_comm_event     = event__process_comm,
        .process_lost_event     = process_lost_event,
        .sample_type_check      = sample_type_check,
 };
@@ -1718,7 +1682,8 @@ static int read_events(void)
        register_idle_thread();
        register_perf_file_handler(&file_handler);
 
-       return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
+       return mmap_dispatch_perf_file(&header, input_name, 0, 0,
+                                      &event__cwdlen, &event__cwd);
 }
 
 static void print_bad_events(void)
index 665877e4a944769166548ba282dd8ad7795d7fee..cb58b6605fcc875fec3d0122e8607b17a408db3c 100644 (file)
 #include "util/header.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/event.h"
+#include "util/data_map.h"
 #include "util/svghelper.h"
 
 static char            const *input_name = "perf.data";
 static char            const *output_name = "output.svg";
 
 
-static unsigned long   page_size;
-static unsigned long   mmap_window = 32;
 static u64             sample_type;
 
 static unsigned int    numcpus;
@@ -49,8 +49,6 @@ static u64            first_time, last_time;
 static int             power_only;
 
 
-static struct perf_header      *header;
-
 struct per_pid;
 struct per_pidcomm;
 
@@ -156,9 +154,9 @@ struct sample_wrapper *all_samples;
 
 struct process_filter;
 struct process_filter {
-       char                    *name;
-       int                     pid;
-       struct process_filter   *next;
+       char                    *name;
+       int                     pid;
+       struct process_filter   *next;
 };
 
 static struct process_filter *process_filter;
@@ -1045,36 +1043,6 @@ static void write_svg_file(const char *filename)
        svg_close();
 }
 
-static int
-process_event(event_t *event)
-{
-
-       switch (event->header.type) {
-
-       case PERF_RECORD_COMM:
-               return process_comm_event(event);
-       case PERF_RECORD_FORK:
-               return process_fork_event(event);
-       case PERF_RECORD_EXIT:
-               return process_exit_event(event);
-       case PERF_RECORD_SAMPLE:
-               return queue_sample_event(event);
-
-       /*
-        * We dont process them right now but they are fine:
-        */
-       case PERF_RECORD_MMAP:
-       case PERF_RECORD_THROTTLE:
-       case PERF_RECORD_UNTHROTTLE:
-               return 0;
-
-       default:
-               return -1;
-       }
-
-       return 0;
-}
-
 static void process_samples(void)
 {
        struct sample_wrapper *cursor;
@@ -1090,105 +1058,38 @@ static void process_samples(void)
        }
 }
 
-
-static int __cmd_timechart(void)
+static int sample_type_check(u64 type)
 {
-       int ret, rc = EXIT_FAILURE;
-       unsigned long offset = 0;
-       unsigned long head, shift;
-       struct stat statbuf;
-       event_t *event;
-       uint32_t size;
-       char *buf;
-       int input;
-
-       input = open(input_name, O_RDONLY);
-       if (input < 0) {
-               fprintf(stderr, " failed to open file: %s", input_name);
-               if (!strcmp(input_name, "perf.data"))
-                       fprintf(stderr, "  (try 'perf record' first)");
-               fprintf(stderr, "\n");
-               exit(-1);
-       }
+       sample_type = type;
 
-       ret = fstat(input, &statbuf);
-       if (ret < 0) {
-               perror("failed to stat file");
-               exit(-1);
-       }
-
-       if (!statbuf.st_size) {
-               fprintf(stderr, "zero-sized file, nothing to do!\n");
-               exit(0);
-       }
-
-       header = perf_header__read(input);
-       head = header->data_offset;
-
-       sample_type = perf_header__sample_type(header);
-
-       shift = page_size * (head / page_size);
-       offset += shift;
-       head -= shift;
-
-remap:
-       buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-                          MAP_SHARED, input, offset);
-       if (buf == MAP_FAILED) {
-               perror("failed to mmap file");
-               exit(-1);
-       }
-
-more:
-       event = (event_t *)(buf + head);
-
-       size = event->header.size;
-       if (!size)
-               size = 8;
-
-       if (head + event->header.size >= page_size * mmap_window) {
-               int ret2;
-
-               shift = page_size * (head / page_size);
-
-               ret2 = munmap(buf, page_size * mmap_window);
-               assert(ret2 == 0);
-
-               offset += shift;
-               head -= shift;
-               goto remap;
-       }
-
-       size = event->header.size;
-
-       if (!size || process_event(event) < 0) {
-               pr_warning("%p [%p]: skipping unknown header type: %d\n",
-                          (void *)(offset + head),
-                          (void *)(long)(event->header.size),
-                          event->header.type);
-               /*
-                * assume we lost track of the stream, check alignment, and
-                * increment a single u64 in the hope to catch on again 'soon'.
-                */
-
-               if (unlikely(head & 7))
-                       head &= ~7ULL;
-
-               size = 8;
+       if (!(sample_type & PERF_SAMPLE_RAW)) {
+               fprintf(stderr, "No trace samples found in the file.\n"
+                               "Have you used 'perf timechart record' to record it?\n");
+               return -1;
        }
 
-       head += size;
+       return 0;
+}
 
-       if (offset + head >= header->data_offset + header->data_size)
-               goto done;
+static struct perf_file_handler file_handler = {
+       .process_comm_event     = process_comm_event,
+       .process_fork_event     = process_fork_event,
+       .process_exit_event     = process_exit_event,
+       .process_sample_event   = queue_sample_event,
+       .sample_type_check      = sample_type_check,
+};
 
-       if (offset + head < (unsigned long)statbuf.st_size)
-               goto more;
+static int __cmd_timechart(void)
+{
+       struct perf_header *header;
+       int ret;
 
-done:
-       rc = EXIT_SUCCESS;
-       close(input);
+       register_perf_file_handler(&file_handler);
 
+       ret = mmap_dispatch_perf_file(&header, input_name, 0, 0,
+                                     &event__cwdlen, &event__cwd);
+       if (ret)
+               return EXIT_FAILURE;
 
        process_samples();
 
@@ -1201,7 +1102,7 @@ done:
        pr_info("Written %2.1f seconds of trace to %s.\n",
                (last_time - first_time) / 1000000000.0, output_name);
 
-       return rc;
+       return EXIT_SUCCESS;
 }
 
 static const char * const timechart_usage[] = {
@@ -1268,8 +1169,6 @@ int cmd_timechart(int argc, const char **argv, const char *prefix __used)
 {
        symbol__init(0);
 
-       page_size = getpagesize();
-
        argc = parse_options(argc, argv, options, timechart_usage,
                        PARSE_OPT_STOP_AT_NON_OPTION);
 
index 89b7f68a1799ee8f45c5e601d9ae8fd1f33e6763..e0a374d0e43a8da197177a7ccccbc3ada6349a2e 100644 (file)
@@ -78,6 +78,8 @@ static int                    dump_symtab                     =      0;
 
 static bool                    hide_kernel_symbols             =  false;
 static bool                    hide_user_symbols               =  false;
+static struct winsize          winsize;
+struct symbol_conf             symbol_conf;
 
 /*
  * Source
@@ -100,58 +102,75 @@ static int                        display_weighted                =     -1;
  * Symbols
  */
 
+struct sym_entry_source {
+       struct source_line      *source;
+       struct source_line      *lines;
+       struct source_line      **lines_tail;
+       pthread_mutex_t         lock;
+};
+
 struct sym_entry {
        struct rb_node          rb_node;
        struct list_head        node;
-       unsigned long           count[MAX_COUNTERS];
        unsigned long           snap_count;
        double                  weight;
        int                     skip;
+       u16                     name_len;
        u8                      origin;
        struct map              *map;
-       struct source_line      *source;
-       struct source_line      *lines;
-       struct source_line      **lines_tail;
-       pthread_mutex_t         source_lock;
+       struct sym_entry_source *src;
+       unsigned long           count[0];
 };
 
 /*
  * Source functions
  */
 
-/* most GUI terminals set LINES (although some don't export it) */
-static int term_rows(void)
+static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
 {
-       char *lines_string = getenv("LINES");
-       int n_lines;
+       return ((void *)self) + symbol_conf.priv_size;
+}
 
-       if (lines_string && (n_lines = atoi(lines_string)) > 0)
-               return n_lines;
-#ifdef TIOCGWINSZ
-       else {
-               struct winsize ws;
-               if (!ioctl(1, TIOCGWINSZ, &ws) && ws.ws_row)
-                       return ws.ws_row;
+static void get_term_dimensions(struct winsize *ws)
+{
+       char *s = getenv("LINES");
+
+       if (s != NULL) {
+               ws->ws_row = atoi(s);
+               s = getenv("COLUMNS");
+               if (s != NULL) {
+                       ws->ws_col = atoi(s);
+                       if (ws->ws_row && ws->ws_col)
+                               return;
+               }
        }
+#ifdef TIOCGWINSZ
+       if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+           ws->ws_row && ws->ws_col)
+               return;
 #endif
-       return 25;
+       ws->ws_row = 25;
+       ws->ws_col = 80;
 }
 
-static void update_print_entries(void)
+static void update_print_entries(struct winsize *ws)
 {
-       print_entries = term_rows();
+       print_entries = ws->ws_row;
+
        if (print_entries > 9)
                print_entries -= 9;
 }
 
 static void sig_winch_handler(int sig __used)
 {
-       update_print_entries();
+       get_term_dimensions(&winsize);
+       update_print_entries(&winsize);
 }
 
 static void parse_source(struct sym_entry *syme)
 {
        struct symbol *sym;
+       struct sym_entry_source *source;
        struct map *map;
        FILE *file;
        char command[PATH_MAX*2];
@@ -161,12 +180,21 @@ static void parse_source(struct sym_entry *syme)
        if (!syme)
                return;
 
-       if (syme->lines) {
-               pthread_mutex_lock(&syme->source_lock);
+       if (syme->src == NULL) {
+               syme->src = zalloc(sizeof(*source));
+               if (syme->src == NULL)
+                       return;
+               pthread_mutex_init(&syme->src->lock, NULL);
+       }
+
+       source = syme->src;
+
+       if (source->lines) {
+               pthread_mutex_lock(&source->lock);
                goto out_assign;
        }
 
-       sym = (struct symbol *)(syme + 1);
+       sym = sym_entry__symbol(syme);
        map = syme->map;
        path = map->dso->long_name;
 
@@ -182,8 +210,8 @@ static void parse_source(struct sym_entry *syme)
        if (!file)
                return;
 
-       pthread_mutex_lock(&syme->source_lock);
-       syme->lines_tail = &syme->lines;
+       pthread_mutex_lock(&source->lock);
+       source->lines_tail = &source->lines;
        while (!feof(file)) {
                struct source_line *src;
                size_t dummy = 0;
@@ -203,8 +231,8 @@ static void parse_source(struct sym_entry *syme)
                        *c = 0;
 
                src->next = NULL;
-               *syme->lines_tail = src;
-               syme->lines_tail = &src->next;
+               *source->lines_tail = src;
+               source->lines_tail = &src->next;
 
                if (strlen(src->line)>8 && src->line[8] == ':') {
                        src->eip = strtoull(src->line, NULL, 16);
@@ -218,7 +246,7 @@ static void parse_source(struct sym_entry *syme)
        pclose(file);
 out_assign:
        sym_filter_entry = syme;
-       pthread_mutex_unlock(&syme->source_lock);
+       pthread_mutex_unlock(&source->lock);
 }
 
 static void __zero_source_counters(struct sym_entry *syme)
@@ -226,7 +254,7 @@ static void __zero_source_counters(struct sym_entry *syme)
        int i;
        struct source_line *line;
 
-       line = syme->lines;
+       line = syme->src->lines;
        while (line) {
                for (i = 0; i < nr_counters; i++)
                        line->count[i] = 0;
@@ -241,13 +269,13 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
        if (syme != sym_filter_entry)
                return;
 
-       if (pthread_mutex_trylock(&syme->source_lock))
+       if (pthread_mutex_trylock(&syme->src->lock))
                return;
 
-       if (!syme->source)
+       if (syme->src == NULL || syme->src->source == NULL)
                goto out_unlock;
 
-       for (line = syme->lines; line; line = line->next) {
+       for (line = syme->src->lines; line; line = line->next) {
                if (line->eip == ip) {
                        line->count[counter]++;
                        break;
@@ -256,25 +284,25 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
                        break;
        }
 out_unlock:
-       pthread_mutex_unlock(&syme->source_lock);
+       pthread_mutex_unlock(&syme->src->lock);
 }
 
 static void lookup_sym_source(struct sym_entry *syme)
 {
-       struct symbol *symbol = (struct symbol *)(syme + 1);
+       struct symbol *symbol = sym_entry__symbol(syme);
        struct source_line *line;
        char pattern[PATH_MAX];
 
        sprintf(pattern, "<%s>:", symbol->name);
 
-       pthread_mutex_lock(&syme->source_lock);
-       for (line = syme->lines; line; line = line->next) {
+       pthread_mutex_lock(&syme->src->lock);
+       for (line = syme->src->lines; line; line = line->next) {
                if (strstr(line->line, pattern)) {
-                       syme->source = line;
+                       syme->src->source = line;
                        break;
                }
        }
-       pthread_mutex_unlock(&syme->source_lock);
+       pthread_mutex_unlock(&syme->src->lock);
 }
 
 static void show_lines(struct source_line *queue, int count, int total)
@@ -304,24 +332,24 @@ static void show_details(struct sym_entry *syme)
        if (!syme)
                return;
 
-       if (!syme->source)
+       if (!syme->src->source)
                lookup_sym_source(syme);
 
-       if (!syme->source)
+       if (!syme->src->source)
                return;
 
-       symbol = (struct symbol *)(syme + 1);
+       symbol = sym_entry__symbol(syme);
        printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
        printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);
 
-       pthread_mutex_lock(&syme->source_lock);
-       line = syme->source;
+       pthread_mutex_lock(&syme->src->lock);
+       line = syme->src->source;
        while (line) {
                total += line->count[sym_counter];
                line = line->next;
        }
 
-       line = syme->source;
+       line = syme->src->source;
        while (line) {
                float pcnt = 0.0;
 
@@ -346,7 +374,7 @@ static void show_details(struct sym_entry *syme)
                line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
                line = line->next;
        }
-       pthread_mutex_unlock(&syme->source_lock);
+       pthread_mutex_unlock(&syme->src->lock);
        if (more)
                printf("%d lines not displayed, maybe increase display entries [e]\n", more);
 }
@@ -423,6 +451,8 @@ static void print_sym_table(void)
        struct sym_entry *syme, *n;
        struct rb_root tmp = RB_ROOT;
        struct rb_node *nd;
+       int sym_width = 0, dso_width = 0, max_dso_width;
+       const int win_width = winsize.ws_col - 1;
 
        samples = userspace_samples = 0;
 
@@ -434,6 +464,7 @@ static void print_sym_table(void)
        list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
                syme->snap_count = syme->count[snap];
                if (syme->snap_count != 0) {
+
                        if ((hide_user_symbols &&
                             syme->origin == PERF_RECORD_MISC_USER) ||
                            (hide_kernel_symbols &&
@@ -453,8 +484,7 @@ static void print_sym_table(void)
 
        puts(CONSOLE_CLEAR);
 
-       printf(
-"------------------------------------------------------------------------------\n");
+       printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
        printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
                samples_per_sec,
                100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
@@ -492,13 +522,35 @@ static void print_sym_table(void)
                        printf(", %d CPUs)\n", nr_cpus);
        }
 
-       printf("------------------------------------------------------------------------------\n\n");
+       printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 
        if (sym_filter_entry) {
                show_details(sym_filter_entry);
                return;
        }
 
+       /*
+        * Find the longest symbol name that will be displayed
+        */
+       for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
+               syme = rb_entry(nd, struct sym_entry, rb_node);
+               if (++printed > print_entries ||
+                   (int)syme->snap_count < count_filter)
+                       continue;
+
+               if (syme->map->dso->long_name_len > dso_width)
+                       dso_width = syme->map->dso->long_name_len;
+
+               if (syme->name_len > sym_width)
+                       sym_width = syme->name_len;
+       }
+
+       printed = 0;
+
+       max_dso_width = winsize.ws_col - sym_width - 29;
+       if (dso_width > max_dso_width)
+               dso_width = max_dso_width;
+       putchar('\n');
        if (nr_counters == 1)
                printf("             samples  pcnt");
        else
@@ -506,19 +558,21 @@ static void print_sym_table(void)
 
        if (verbose)
                printf("         RIP       ");
-       printf(" function                                 DSO\n");
+       printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
        printf("   %s    _______ _____",
               nr_counters == 1 ? "      " : "______");
        if (verbose)
                printf(" ________________");
-       printf(" ________________________________ ________________\n\n");
+       printf(" %-*.*s", sym_width, sym_width, graph_line);
+       printf(" %-*.*s", dso_width, dso_width, graph_line);
+       puts("\n");
 
        for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
                struct symbol *sym;
                double pcnt;
 
                syme = rb_entry(nd, struct sym_entry, rb_node);
-               sym = (struct symbol *)(syme + 1);
+               sym = sym_entry__symbol(syme);
 
                if (++printed > print_entries || (int)syme->snap_count < count_filter)
                        continue;
@@ -534,9 +588,11 @@ static void print_sym_table(void)
                percent_color_fprintf(stdout, "%4.1f%%", pcnt);
                if (verbose)
                        printf(" %016llx", sym->start);
-               printf(" %-32s", sym->name);
-               printf(" %s", syme->map->dso->short_name);
-               printf("\n");
+               printf(" %-*.*s", sym_width, sym_width, sym->name);
+               printf(" %-*.*s\n", dso_width, dso_width,
+                      dso_width >= syme->map->dso->long_name_len ?
+                                       syme->map->dso->long_name :
+                                       syme->map->dso->short_name);
        }
 }
 
@@ -583,10 +639,10 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
 
        /* zero counters of active symbol */
        if (syme) {
-               pthread_mutex_lock(&syme->source_lock);
+               pthread_mutex_lock(&syme->src->lock);
                __zero_source_counters(syme);
                *target = NULL;
-               pthread_mutex_unlock(&syme->source_lock);
+               pthread_mutex_unlock(&syme->src->lock);
        }
 
        fprintf(stdout, "\n%s: ", msg);
@@ -602,7 +658,7 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
        pthread_mutex_unlock(&active_symbols_lock);
 
        list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
-               struct symbol *sym = (struct symbol *)(syme + 1);
+               struct symbol *sym = sym_entry__symbol(syme);
 
                if (!strcmp(buf, sym->name)) {
                        found = syme;
@@ -626,7 +682,7 @@ static void print_mapped_keys(void)
        char *name = NULL;
 
        if (sym_filter_entry) {
-               struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
+               struct symbol *sym = sym_entry__symbol(sym_filter_entry);
                name = sym->name;
        }
 
@@ -639,7 +695,7 @@ static void print_mapped_keys(void)
 
        fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);
 
-       if (vmlinux_name) {
+       if (symbol_conf.vmlinux_name) {
                fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
                fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
                fprintf(stdout, "\t[S]     stop annotation.\n");
@@ -676,7 +732,7 @@ static int key_mapped(int c)
                case 'F':
                case 's':
                case 'S':
-                       return vmlinux_name ? 1 : 0;
+                       return symbol_conf.vmlinux_name ? 1 : 0;
                default:
                        break;
        }
@@ -718,7 +774,7 @@ static void handle_keypress(int c)
                case 'e':
                        prompt_integer(&print_entries, "Enter display entries (lines)");
                        if (print_entries == 0) {
-                               update_print_entries();
+                               sig_winch_handler(SIGWINCH);
                                signal(SIGWINCH, sig_winch_handler);
                        } else
                                signal(SIGWINCH, SIG_DFL);
@@ -752,6 +808,8 @@ static void handle_keypress(int c)
                case 'q':
                case 'Q':
                        printf("exiting.\n");
+                       if (dump_symtab)
+                               dsos__fprintf(stderr);
                        exit(0);
                case 's':
                        prompt_symbol(&sym_filter_entry, "Enter details symbol");
@@ -762,10 +820,10 @@ static void handle_keypress(int c)
                        else {
                                struct sym_entry *syme = sym_filter_entry;
 
-                               pthread_mutex_lock(&syme->source_lock);
+                               pthread_mutex_lock(&syme->src->lock);
                                sym_filter_entry = NULL;
                                __zero_source_counters(syme);
-                               pthread_mutex_unlock(&syme->source_lock);
+                               pthread_mutex_unlock(&syme->src->lock);
                        }
                        break;
                case 'U':
@@ -851,7 +909,7 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 
        syme = symbol__priv(sym);
        syme->map = map;
-       pthread_mutex_init(&syme->source_lock, NULL);
+       syme->src = NULL;
        if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
                sym_filter_entry = syme;
 
@@ -862,16 +920,8 @@ static int symbol_filter(struct map *map, struct symbol *sym)
                }
        }
 
-       return 0;
-}
-
-static int parse_symbols(void)
-{
-       if (dsos__load_kernel(vmlinux_name, symbol_filter, 1) <= 0)
-               return -1;
-
-       if (dump_symtab)
-               dsos__fprintf(stderr);
+       if (!syme->skip)
+               syme->name_len = strlen(sym->name);
 
        return 0;
 }
@@ -879,55 +929,28 @@ static int parse_symbols(void)
 static void event__process_sample(const event_t *self, int counter)
 {
        u64 ip = self->ip.ip;
-       struct map *map;
        struct sym_entry *syme;
-       struct symbol *sym;
+       struct addr_location al;
        u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
        switch (origin) {
-       case PERF_RECORD_MISC_USER: {
-               struct thread *thread;
-
+       case PERF_RECORD_MISC_USER:
                if (hide_user_symbols)
                        return;
-
-               thread = threads__findnew(self->ip.pid);
-               if (thread == NULL)
-                       return;
-
-               map = thread__find_map(thread, ip);
-               if (map != NULL) {
-                       ip = map->map_ip(map, ip);
-                       sym = map__find_symbol(map, ip, symbol_filter);
-                       if (sym == NULL)
-                               return;
-                       userspace_samples++;
-                       break;
-               }
-       }
-               /*
-                * If this is outside of all known maps,
-                * and is a negative address, try to look it
-                * up in the kernel dso, as it might be a
-                * vsyscall or vdso (which executes in user-mode).
-                */
-               if ((long long)ip >= 0)
-                       return;
-               /* Fall thru */
+               break;
        case PERF_RECORD_MISC_KERNEL:
                if (hide_kernel_symbols)
                        return;
-
-               sym = kernel_maps__find_symbol(ip, &map);
-               if (sym == NULL)
-                       return;
                break;
        default:
                return;
        }
 
-       syme = symbol__priv(sym);
+       if (event__preprocess_sample(self, &al, symbol_filter) < 0 ||
+           al.sym == NULL)
+               return;
 
+       syme = symbol__priv(al.sym);
        if (!syme->skip) {
                syme->count[counter]++;
                syme->origin = origin;
@@ -936,30 +959,12 @@ static void event__process_sample(const event_t *self, int counter)
                if (list_empty(&syme->node) || !syme->node.next)
                        __list_insert_active_sym(syme);
                pthread_mutex_unlock(&active_symbols_lock);
+               if (origin == PERF_RECORD_MISC_USER)
+                       ++userspace_samples;
                ++samples;
-               return;
-       }
-}
-
-static void event__process_mmap(event_t *self)
-{
-       struct thread *thread = threads__findnew(self->mmap.pid);
-
-       if (thread != NULL) {
-               struct map *map = map__new(&self->mmap, NULL, 0);
-               if (map != NULL)
-                       thread__insert_map(thread, map);
        }
 }
 
-static void event__process_comm(event_t *self)
-{
-       struct thread *thread = threads__findnew(self->comm.pid);
-
-       if (thread != NULL)
-               thread__set_comm(thread, self->comm.comm);
-}
-
 static int event__process(event_t *event)
 {
        switch (event->header.type) {
@@ -1211,7 +1216,8 @@ static const struct option options[] = {
                            "system-wide collection from all CPUs"),
        OPT_INTEGER('C', "CPU", &profile_cpu,
                    "CPU to profile on"),
-       OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
+       OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+                  "file", "vmlinux pathname"),
        OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
                    "hide kernel symbols"),
        OPT_INTEGER('m', "mmap-pages", &mmap_pages,
@@ -1247,8 +1253,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 {
        int counter;
 
-       symbol__init(sizeof(struct sym_entry));
-
        page_size = sysconf(_SC_PAGE_SIZE);
 
        argc = parse_options(argc, argv, options, top_usage, 0);
@@ -1265,13 +1269,18 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
        if (!nr_counters)
                nr_counters = 1;
 
+       symbol_conf.priv_size = (sizeof(struct sym_entry) +
+                                (nr_counters + 1) * sizeof(unsigned long));
+       if (symbol_conf.vmlinux_name == NULL)
+               symbol_conf.try_vmlinux_path = true;
+       if (symbol__init(&symbol_conf) < 0)
+               return -1;
+
        if (delay_secs < 1)
                delay_secs = 1;
 
-       parse_symbols();
        parse_source(sym_filter_entry);
 
-
        /*
         * User specified count overrides default frequency.
         */
@@ -1301,8 +1310,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
        if (target_pid != -1 || profile_cpu != -1)
                nr_cpus = 1;
 
+       get_term_dimensions(&winsize);
        if (print_entries == 0) {
-               update_print_entries();
+               update_print_entries(&winsize);
                signal(SIGWINCH, sig_winch_handler);
        }
 
index d042d656c5616d5a17cd509a079a5809902bb036..abb914aa7be62e13c18fa6f7eea3c268d295c65d 100644 (file)
@@ -5,49 +5,66 @@
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/header.h"
+#include "util/exec_cmd.h"
+#include "util/trace-event.h"
 
-#include "util/parse-options.h"
+static char const              *script_name;
+static char const              *generate_script_lang;
 
-#include "perf.h"
-#include "util/debug.h"
+static int default_start_script(const char *script __attribute((unused)))
+{
+       return 0;
+}
 
-#include "util/trace-event.h"
-#include "util/data_map.h"
+static int default_stop_script(void)
+{
+       return 0;
+}
 
-static char            const *input_name = "perf.data";
+static int default_generate_script(const char *outfile __attribute ((unused)))
+{
+       return 0;
+}
 
-static unsigned long   total = 0;
-static unsigned long   total_comm = 0;
+static struct scripting_ops default_scripting_ops = {
+       .start_script           = default_start_script,
+       .stop_script            = default_stop_script,
+       .process_event          = print_event,
+       .generate_script        = default_generate_script,
+};
+
+static struct scripting_ops    *scripting_ops;
 
-static struct perf_header *header;
-static u64             sample_type;
+static void setup_scripting(void)
+{
+       /* make sure PERF_EXEC_PATH is set for scripts */
+       perf_set_argv_exec_path(perf_exec_path());
 
-static char            *cwd;
-static int             cwdlen;
+       setup_perl_scripting();
 
+       scripting_ops = &default_scripting_ops;
+}
 
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+static int cleanup_scripting(void)
 {
-       struct thread *thread = threads__findnew(event->comm.pid);
+       return scripting_ops->stop_script();
+}
 
-       dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->comm.comm, event->comm.pid);
+#include "util/parse-options.h"
 
-       if (thread == NULL ||
-           thread__set_comm(thread, event->comm.comm)) {
-               dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
-               return -1;
-       }
-       total_comm++;
+#include "perf.h"
+#include "util/debug.h"
 
-       return 0;
-}
+#include "util/trace-event.h"
+#include "util/data_map.h"
+#include "util/exec_cmd.h"
+
+static char const              *input_name = "perf.data";
 
-static int
-process_sample_event(event_t *event, unsigned long offset, unsigned long head)
+static struct perf_header      *header;
+static u64                     sample_type;
+
+static int process_sample_event(event_t *event)
 {
        u64 ip = event->ip.ip;
        u64 timestamp = -1;
@@ -72,9 +89,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                more_data += sizeof(u64);
        }
 
-       dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
+       dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
                event->header.misc,
                event->ip.pid, event->ip.tid,
                (void *)(long)ip,
@@ -99,9 +114,10 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                 * field, although it should be the same than this perf
                 * event pid
                 */
-               print_event(cpu, raw->data, raw->size, timestamp, thread->comm);
+               scripting_ops->process_event(cpu, raw->data, raw->size,
+                                            timestamp, thread->comm);
        }
-       total += period;
+       event__stats.total += period;
 
        return 0;
 }
@@ -122,7 +138,7 @@ static int sample_type_check(u64 type)
 
 static struct perf_file_handler file_handler = {
        .process_sample_event   = process_sample_event,
-       .process_comm_event     = process_comm_event,
+       .process_comm_event     = event__process_comm,
        .sample_type_check      = sample_type_check,
 };
 
@@ -131,7 +147,156 @@ static int __cmd_trace(void)
        register_idle_thread();
        register_perf_file_handler(&file_handler);
 
-       return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
+       return mmap_dispatch_perf_file(&header, input_name,
+                                      0, 0, &event__cwdlen, &event__cwd);
+}
+
+struct script_spec {
+       struct list_head        node;
+       struct scripting_ops    *ops;
+       char                    spec[0];
+};
+
+LIST_HEAD(script_specs);
+
+static struct script_spec *script_spec__new(const char *spec,
+                                           struct scripting_ops *ops)
+{
+       struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
+
+       if (s != NULL) {
+               strcpy(s->spec, spec);
+               s->ops = ops;
+       }
+
+       return s;
+}
+
+static void script_spec__delete(struct script_spec *s)
+{
+       free(s->spec);
+       free(s);
+}
+
+static void script_spec__add(struct script_spec *s)
+{
+       list_add_tail(&s->node, &script_specs);
+}
+
+static struct script_spec *script_spec__find(const char *spec)
+{
+       struct script_spec *s;
+
+       list_for_each_entry(s, &script_specs, node)
+               if (strcasecmp(s->spec, spec) == 0)
+                       return s;
+       return NULL;
+}
+
+static struct script_spec *script_spec__findnew(const char *spec,
+                                               struct scripting_ops *ops)
+{
+       struct script_spec *s = script_spec__find(spec);
+
+       if (s)
+               return s;
+
+       s = script_spec__new(spec, ops);
+       if (!s)
+               goto out_delete_spec;
+
+       script_spec__add(s);
+
+       return s;
+
+out_delete_spec:
+       script_spec__delete(s);
+
+       return NULL;
+}
+
+int script_spec_register(const char *spec, struct scripting_ops *ops)
+{
+       struct script_spec *s;
+
+       s = script_spec__find(spec);
+       if (s)
+               return -1;
+
+       s = script_spec__findnew(spec, ops);
+       if (!s)
+               return -1;
+
+       return 0;
+}
+
+static struct scripting_ops *script_spec__lookup(const char *spec)
+{
+       struct script_spec *s = script_spec__find(spec);
+       if (!s)
+               return NULL;
+
+       return s->ops;
+}
+
+static void list_available_languages(void)
+{
+       struct script_spec *s;
+
+       fprintf(stderr, "\n");
+       fprintf(stderr, "Scripting language extensions (used in "
+               "perf trace -s [spec:]script.[spec]):\n\n");
+
+       list_for_each_entry(s, &script_specs, node)
+               fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
+
+       fprintf(stderr, "\n");
+}
+
+static int parse_scriptname(const struct option *opt __used,
+                           const char *str, int unset __used)
+{
+       char spec[PATH_MAX];
+       const char *script, *ext;
+       int len;
+
+       if (strcmp(str, "list") == 0) {
+               list_available_languages();
+               return 0;
+       }
+
+       script = strchr(str, ':');
+       if (script) {
+               len = script - str;
+               if (len >= PATH_MAX) {
+                       fprintf(stderr, "invalid language specifier");
+                       return -1;
+               }
+               strncpy(spec, str, len);
+               spec[len] = '\0';
+               scripting_ops = script_spec__lookup(spec);
+               if (!scripting_ops) {
+                       fprintf(stderr, "invalid language specifier");
+                       return -1;
+               }
+               script++;
+       } else {
+               script = str;
+               ext = strchr(script, '.');
+               if (!ext) {
+                       fprintf(stderr, "invalid script extension");
+                       return -1;
+               }
+               scripting_ops = script_spec__lookup(++ext);
+               if (!scripting_ops) {
+                       fprintf(stderr, "invalid script extension");
+                       return -1;
+               }
+       }
+
+       script_name = strdup(script);
+
+       return 0;
 }
 
 static const char * const annotate_usage[] = {
@@ -146,13 +311,23 @@ static const struct option options[] = {
                    "be more verbose (show symbol address, etc)"),
        OPT_BOOLEAN('l', "latency", &latency_format,
                    "show latency attributes (irqs/preemption disabled, etc)"),
+       OPT_CALLBACK('s', "script", NULL, "name",
+                    "script file name (lang:script name, script name, or *)",
+                    parse_scriptname),
+       OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
+                  "generate perf-trace.xx script in specified language"),
+
        OPT_END()
 };
 
 int cmd_trace(int argc, const char **argv, const char *prefix __used)
 {
+       int err;
+
        symbol__init(0);
 
+       setup_scripting();
+
        argc = parse_options(argc, argv, options, annotate_usage, 0);
        if (argc) {
                /*
@@ -165,5 +340,50 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
 
        setup_pager();
 
-       return __cmd_trace();
+       if (generate_script_lang) {
+               struct stat perf_stat;
+
+               int input = open(input_name, O_RDONLY);
+               if (input < 0) {
+                       perror("failed to open file");
+                       exit(-1);
+               }
+
+               err = fstat(input, &perf_stat);
+               if (err < 0) {
+                       perror("failed to stat file");
+                       exit(-1);
+               }
+
+               if (!perf_stat.st_size) {
+                       fprintf(stderr, "zero-sized file, nothing to do!\n");
+                       exit(0);
+               }
+
+               scripting_ops = script_spec__lookup(generate_script_lang);
+               if (!scripting_ops) {
+                       fprintf(stderr, "invalid language specifier");
+                       return -1;
+               }
+
+               header = perf_header__new();
+               if (header == NULL)
+                       return -1;
+
+               perf_header__read(header, input);
+               err = scripting_ops->generate_script("perf-trace");
+               goto out;
+       }
+
+       if (script_name) {
+               err = scripting_ops->start_script(script_name);
+               if (err)
+                       goto out;
+       }
+
+       err = __cmd_trace();
+
+       cleanup_scripting();
+out:
+       return err;
 }
index 9b02d85091fe7b59a955830fefb653278bd3e86f..a3d8bf65f26c2a0466bd76ab02d5e48db02ce0dc 100644 (file)
@@ -28,5 +28,6 @@ extern int cmd_top(int argc, const char **argv, const char *prefix);
 extern int cmd_trace(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
 extern int cmd_probe(int argc, const char **argv, const char *prefix);
+extern int cmd_kmem(int argc, const char **argv, const char *prefix);
 
 #endif
index d3a6e18e4a5e2fcd9a4c38a1f969c6d622d40e46..02b09ea17a3ecad604cd970edf28b386fb22a15b 100644 (file)
@@ -14,3 +14,4 @@ perf-timechart                        mainporcelain common
 perf-top                       mainporcelain common
 perf-trace                     mainporcelain common
 perf-probe                     mainporcelain common
+perf-kmem                      mainporcelain common
index 89b82acac7d9d9800f63088a530029e95c1e601b..cf64049bc9bdd71e4c97c68a59e5b754bf9311ad 100644 (file)
@@ -285,20 +285,21 @@ static void handle_internal_command(int argc, const char **argv)
 {
        const char *cmd = argv[0];
        static struct cmd_struct commands[] = {
-               { "help", cmd_help, 0 },
-               { "list", cmd_list, 0 },
                { "buildid-list", cmd_buildid_list, 0 },
-               { "record", cmd_record, 0 },
-               { "report", cmd_report, 0 },
-               { "bench", cmd_bench, 0 },
-               { "stat", cmd_stat, 0 },
-               { "timechart", cmd_timechart, 0 },
-               { "top", cmd_top, 0 },
-               { "annotate", cmd_annotate, 0 },
-               { "version", cmd_version, 0 },
-               { "trace", cmd_trace, 0 },
-               { "sched", cmd_sched, 0 },
-               { "probe", cmd_probe, 0 },
+               { "help",       cmd_help,       0 },
+               { "list",       cmd_list,       0 },
+               { "record",     cmd_record,     0 },
+               { "report",     cmd_report,     0 },
+               { "bench",      cmd_bench,      0 },
+               { "stat",       cmd_stat,       0 },
+               { "timechart",  cmd_timechart,  0 },
+               { "top",        cmd_top,        0 },
+               { "annotate",   cmd_annotate,   0 },
+               { "version",    cmd_version,    0 },
+               { "trace",      cmd_trace,      0 },
+               { "sched",      cmd_sched,      0 },
+               { "probe",      cmd_probe,      0 },
+               { "kmem",       cmd_kmem,       0 },
        };
        unsigned int i;
        static const char ext[] = STRIP_EXTENSION;
index 216bdb223f63e5d6b70d7e2c815318d0248ca853..454d5d55f32d9cb30d8206c6f03bfe1e0b5f61fb 100644 (file)
 #define cpu_relax()    asm volatile("" ::: "memory")
 #endif
 
+#ifdef __ia64__
+#include "../../arch/ia64/include/asm/unistd.h"
+#define rmb()          asm volatile ("mf" ::: "memory")
+#define cpu_relax()    asm volatile ("hint @pause" ::: "memory")
+#endif
+
 #include <time.h>
 #include <unistd.h>
 #include <sys/types.h>
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
new file mode 100644 (file)
index 0000000..af78d9a
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * This file was generated automatically by ExtUtils::ParseXS version 2.18_02 from the
+ * contents of Context.xs. Do not edit this file, edit Context.xs instead.
+ *
+ *     ANY CHANGES MADE HERE WILL BE LOST! 
+ *
+ */
+
+#line 1 "Context.xs"
+/*
+ * Context.xs.  XS interfaces for perf trace.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "../../../util/trace-event-perl.h"
+
+#ifndef PERL_UNUSED_VAR
+#  define PERL_UNUSED_VAR(var) if (0) var = var
+#endif
+
+#line 41 "Context.c"
+
+XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_pc)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_pc", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+       struct scripting_context *      context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+       int     RETVAL;
+       dXSTARG;
+
+       RETVAL = common_pc(context);
+       XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+
+XS(XS_Perf__Trace__Context_common_flags); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_flags)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_flags", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+       struct scripting_context *      context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+       int     RETVAL;
+       dXSTARG;
+
+       RETVAL = common_flags(context);
+       XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+
+XS(XS_Perf__Trace__Context_common_lock_depth); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_lock_depth)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_lock_depth", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+       struct scripting_context *      context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+       int     RETVAL;
+       dXSTARG;
+
+       RETVAL = common_lock_depth(context);
+       XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+#ifdef __cplusplus
+extern "C"
+#endif
+XS(boot_Perf__Trace__Context); /* prototype to pass -Wmissing-prototypes */
+XS(boot_Perf__Trace__Context)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    const char* file = __FILE__;
+
+    PERL_UNUSED_VAR(cv); /* -W */
+    PERL_UNUSED_VAR(items); /* -W */
+    XS_VERSION_BOOTCHECK ;
+
+        newXSproto("Perf::Trace::Context::common_pc", XS_Perf__Trace__Context_common_pc, file, "$");
+        newXSproto("Perf::Trace::Context::common_flags", XS_Perf__Trace__Context_common_flags, file, "$");
+        newXSproto("Perf::Trace::Context::common_lock_depth", XS_Perf__Trace__Context_common_lock_depth, file, "$");
+    if (PL_unitcheckav)
+         call_list(PL_scopestack_ix, PL_unitcheckav);
+    XSRETURN_YES;
+}
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
new file mode 100644 (file)
index 0000000..fb78006
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Context.xs.  XS interfaces for perf trace.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "../../../util/trace-event-perl.h"
+
+MODULE = Perf::Trace::Context          PACKAGE = Perf::Trace::Context
+PROTOTYPES: ENABLE
+
+int
+common_pc(context)
+       struct scripting_context * context
+
+int
+common_flags(context)
+       struct scripting_context * context
+
+int
+common_lock_depth(context)
+       struct scripting_context * context
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL
new file mode 100644 (file)
index 0000000..decdeb0
--- /dev/null
@@ -0,0 +1,17 @@
+use 5.010000;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+    NAME              => 'Perf::Trace::Context',
+    VERSION_FROM      => 'lib/Perf/Trace/Context.pm', # finds $VERSION
+    PREREQ_PM         => {}, # e.g., Module::Name => 1.1
+    ($] >= 5.005 ?     ## Add these new keywords supported since 5.005
+      (ABSTRACT_FROM  => 'lib/Perf/Trace/Context.pm', # retrieve abstract from module
+       AUTHOR         => 'Tom Zanussi <tzanussi@gmail.com>') : ()),
+    LIBS              => [''], # e.g., '-lm'
+    DEFINE            => '-I ../..', # e.g., '-DHAVE_SOMETHING'
+    INC               => '-I.', # e.g., '-I. -I/usr/include/other'
+       # Un-comment this if you add C files to link with later:
+    OBJECT            => 'Context.o', # link all the C files too
+);
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README
new file mode 100644 (file)
index 0000000..9a97076
--- /dev/null
@@ -0,0 +1,59 @@
+Perf-Trace-Util version 0.01
+============================
+
+This module contains utility functions for use with perf trace.
+
+Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
+that the core perf support for Perl calls on and should always be
+'used', while Util.pm contains useful but optional utility functions
+that scripts may want to use.  Context.pm contains the Perl->C
+interface that allows scripts to access data in the embedding perf
+executable; scripts wishing to do that should 'use Context.pm'.
+
+The Perl->C perf interface is completely driven by Context.xs.  If you
+want to add new Perl functions that end up accessing C data in the
+perf executable, you add desciptions of the new functions here.
+scripting_context is a pointer to the perf data in the perf executable
+that you want to access - it's passed as the second parameter,
+$context, to all handler functions.
+
+After you do that:
+
+  perl Makefile.PL   # to create a Makefile for the next step
+  make               # to create Context.c
+
+  edit Context.c to add const to the char* file = __FILE__ line in
+  XS(boot_Perf__Trace__Context) to silence a warning/error.
+
+  You can delete the Makefile, object files and anything else that was
+  generated e.g. blib and shared library, etc, except for of course
+  Context.c
+
+  You should then be able to run the normal perf make as usual.
+
+INSTALLATION
+
+Building perf with perf trace Perl scripting should install this
+module in the right place.
+
+You should make sure libperl and ExtUtils/Embed.pm are installed first
+e.g. apt-get install libperl-dev or yum install perl-ExtUtils-Embed.
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+  None
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2009 by Tom Zanussi <tzanussi@gmail.com>
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
new file mode 100644 (file)
index 0000000..6c7f365
--- /dev/null
@@ -0,0 +1,55 @@
+package Perf::Trace::Context;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+       common_pc common_flags common_lock_depth
+);
+
+our $VERSION = '0.01';
+
+require XSLoader;
+XSLoader::load('Perf::Trace::Context', $VERSION);
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Context - Perl extension for accessing functions in perf.
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Context;
+
+=head1 SEE ALSO
+
+Perf (trace) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
new file mode 100644 (file)
index 0000000..9df376a
--- /dev/null
@@ -0,0 +1,192 @@
+package Perf::Trace::Core;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+define_flag_field define_flag_value flag_str dump_flag_fields
+define_symbolic_field define_symbolic_value symbol_str dump_symbolic_fields
+trace_flag_str
+);
+
+our $VERSION = '0.01';
+
+my %trace_flags = (0x00 => "NONE",
+                  0x01 => "IRQS_OFF",
+                  0x02 => "IRQS_NOSUPPORT",
+                  0x04 => "NEED_RESCHED",
+                  0x08 => "HARDIRQ",
+                  0x10 => "SOFTIRQ");
+
+sub trace_flag_str
+{
+    my ($value) = @_;
+
+    my $string;
+
+    my $print_delim = 0;
+
+    foreach my $idx (sort {$a <=> $b} keys %trace_flags) {
+       if (!$value && !$idx) {
+           $string .= "NONE";
+           last;
+       }
+
+       if ($idx && ($value & $idx) == $idx) {
+           if ($print_delim) {
+               $string .= " | ";
+           }
+           $string .= "$trace_flags{$idx}";
+           $print_delim = 1;
+           $value &= ~$idx;
+       }
+    }
+
+    return $string;
+}
+
+my %flag_fields;
+my %symbolic_fields;
+
+sub flag_str
+{
+    my ($event_name, $field_name, $value) = @_;
+
+    my $string;
+
+    if ($flag_fields{$event_name}{$field_name}) {
+       my $print_delim = 0;
+       foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event_name}{$field_name}{"values"}}) {
+           if (!$value && !$idx) {
+               $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
+               last;
+           }
+           if ($idx && ($value & $idx) == $idx) {
+               if ($print_delim && $flag_fields{$event_name}{$field_name}{'delim'}) {
+                   $string .= " $flag_fields{$event_name}{$field_name}{'delim'} ";
+               }
+               $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
+               $print_delim = 1;
+               $value &= ~$idx;
+           }
+       }
+    }
+
+    return $string;
+}
+
+sub define_flag_field
+{
+    my ($event_name, $field_name, $delim) = @_;
+
+    $flag_fields{$event_name}{$field_name}{"delim"} = $delim;
+}
+
+sub define_flag_value
+{
+    my ($event_name, $field_name, $value, $field_str) = @_;
+
+    $flag_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
+}
+
+sub dump_flag_fields
+{
+    for my $event (keys %flag_fields) {
+       print "event $event:\n";
+       for my $field (keys %{$flag_fields{$event}}) {
+           print "    field: $field:\n";
+           print "        delim: $flag_fields{$event}{$field}{'delim'}\n";
+           foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event}{$field}{"values"}}) {
+               print "        value $idx: $flag_fields{$event}{$field}{'values'}{$idx}\n";
+           }
+       }
+    }
+}
+
+sub symbol_str
+{
+    my ($event_name, $field_name, $value) = @_;
+
+    if ($symbolic_fields{$event_name}{$field_name}) {
+       foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event_name}{$field_name}{"values"}}) {
+           if (!$value && !$idx) {
+               return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
+               last;
+           }
+           if ($value == $idx) {
+               return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
+           }
+       }
+    }
+
+    return undef;
+}
+
+sub define_symbolic_field
+{
+    my ($event_name, $field_name) = @_;
+
+    # nothing to do, really
+}
+
+sub define_symbolic_value
+{
+    my ($event_name, $field_name, $value, $field_str) = @_;
+
+    $symbolic_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
+}
+
+sub dump_symbolic_fields
+{
+    for my $event (keys %symbolic_fields) {
+       print "event $event:\n";
+       for my $field (keys %{$symbolic_fields{$event}}) {
+           print "    field: $field:\n";
+           foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event}{$field}{"values"}}) {
+               print "        value $idx: $symbolic_fields{$event}{$field}{'values'}{$idx}\n";
+           }
+       }
+    }
+}
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Core - Perl extension for perf trace
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Core
+
+=head1 SEE ALSO
+
+Perf (trace) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
new file mode 100644 (file)
index 0000000..052f132
--- /dev/null
@@ -0,0 +1,88 @@
+package Perf::Trace::Util;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs
+);
+
+our $VERSION = '0.01';
+
+sub avg
+{
+    my ($total, $n) = @_;
+
+    return $total / $n;
+}
+
+my $NSECS_PER_SEC    = 1000000000;
+
+sub nsecs
+{
+    my ($secs, $nsecs) = @_;
+
+    return $secs * $NSECS_PER_SEC + $nsecs;
+}
+
+sub nsecs_secs {
+    my ($nsecs) = @_;
+
+    return $nsecs / $NSECS_PER_SEC;
+}
+
+sub nsecs_nsecs {
+    my ($nsecs) = @_;
+
+    return $nsecs - nsecs_secs($nsecs);
+}
+
+sub nsecs_str {
+    my ($nsecs) = @_;
+
+    my $str = sprintf("%5u.%09u", nsecs_secs($nsecs), nsecs_nsecs($nsecs));
+
+    return $str;
+}
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Util - Perl extension for perf trace
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Util;
+
+=head1 SEE ALSO
+
+Perf (trace) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/typemap b/tools/perf/scripts/perl/Perf-Trace-Util/typemap
new file mode 100644 (file)
index 0000000..8408368
--- /dev/null
@@ -0,0 +1 @@
+struct scripting_context * T_PTR
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record
new file mode 100644 (file)
index 0000000..c7ec5de
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry
+
+
+
+
+
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-report b/tools/perf/scripts/perl/bin/check-perf-trace-report
new file mode 100644 (file)
index 0000000..89948b0
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/check-perf-trace.pl
+
+
+
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record
new file mode 100644 (file)
index 0000000..b25056e
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
new file mode 100644 (file)
index 0000000..f5dcf9c
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl
+
+
+
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record
new file mode 100644 (file)
index 0000000..8903979
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
new file mode 100644 (file)
index 0000000..cea16f7
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl
+
+
+
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record
new file mode 100644 (file)
index 0000000..6abedda
--- /dev/null
@@ -0,0 +1,6 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup
+
+
+
+
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
new file mode 100644 (file)
index 0000000..85769dc
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl
+
+
+
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record
new file mode 100644 (file)
index 0000000..fce6637
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
new file mode 100644 (file)
index 0000000..aa68435
--- /dev/null
@@ -0,0 +1,6 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl
+
+
+
+
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
new file mode 100644 (file)
index 0000000..4e7dc0a
--- /dev/null
@@ -0,0 +1,106 @@
+# perf trace event handlers, generated by perf trace -g perl
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# This script tests basic functionality such as flag and symbol
+# strings, common_xxx() calls back into perf, begin, end, unhandled
+# events, etc.  Basically, if this script runs successfully and
+# displays expected results, perl scripting support should be ok.
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Context;
+use Perf::Trace::Util;
+
+sub trace_begin
+{
+    print "trace_begin\n";
+}
+
+sub trace_end
+{
+    print "trace_end\n";
+
+    print_unhandled();
+}
+
+sub irq::softirq_entry
+{
+       my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+           $common_pid, $common_comm,
+           $vec) = @_;
+
+       print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
+                    $common_pid, $common_comm);
+
+       print_uncommon($context);
+
+       printf("vec=%s\n",
+              symbol_str("irq::softirq_entry", "vec", $vec));
+}
+
+sub kmem::kmalloc
+{
+       my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+           $common_pid, $common_comm,
+           $call_site, $ptr, $bytes_req, $bytes_alloc,
+           $gfp_flags) = @_;
+
+       print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
+                    $common_pid, $common_comm);
+
+       print_uncommon($context);
+
+       printf("call_site=%p, ptr=%p, bytes_req=%u, bytes_alloc=%u, ".
+              "gfp_flags=%s\n",
+              $call_site, $ptr, $bytes_req, $bytes_alloc,
+
+              flag_str("kmem::kmalloc", "gfp_flags", $gfp_flags));
+}
+
+# print trace fields not included in handler args
+sub print_uncommon
+{
+    my ($context) = @_;
+
+    printf("common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, ",
+          common_pc($context), trace_flag_str(common_flags($context)),
+          common_lock_depth($context));
+
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+       return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+          "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+       printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
+
+sub print_header
+{
+       my ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;
+
+       printf("%-20s %5u %05u.%09u %8u %-20s ",
+              $event_name, $cpu, $secs, $nsecs, $pid, $comm);
+}
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
new file mode 100644 (file)
index 0000000..61f9156
--- /dev/null
@@ -0,0 +1,105 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display r/w activity for files read/written to for a given program
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+# change this to the comm of the program you're interested in
+my $for_comm = "perf";
+
+my %reads;
+my %writes;
+
+sub syscalls::sys_enter_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+
+    if ($common_comm eq $for_comm) {
+       $reads{$fd}{bytes_requested} += $count;
+       $reads{$fd}{total_reads}++;
+    }
+}
+
+sub syscalls::sys_enter_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+
+    if ($common_comm eq $for_comm) {
+       $writes{$fd}{bytes_written} += $count;
+       $writes{$fd}{total_writes}++;
+    }
+}
+
+sub trace_end
+{
+    printf("file read counts for $for_comm:\n\n");
+
+    printf("%6s  %10s  %10s\n", "fd", "# reads", "bytes_requested");
+    printf("%6s  %10s  %10s\n", "------", "----------", "-----------");
+
+    foreach my $fd (sort {$reads{$b}{bytes_requested} <=>
+                             $reads{$a}{bytes_requested}} keys %reads) {
+       my $total_reads = $reads{$fd}{total_reads};
+       my $bytes_requested = $reads{$fd}{bytes_requested};
+       printf("%6u  %10u  %10u\n", $fd, $total_reads, $bytes_requested);
+    }
+
+    printf("\nfile write counts for $for_comm:\n\n");
+
+    printf("%6s  %10s  %10s\n", "fd", "# writes", "bytes_written");
+    printf("%6s  %10s  %10s\n", "------", "----------", "-----------");
+
+    foreach my $fd (sort {$writes{$b}{bytes_written} <=>
+                             $writes{$a}{bytes_written}} keys %writes) {
+       my $total_writes = $writes{$fd}{total_writes};
+       my $bytes_written = $writes{$fd}{bytes_written};
+       printf("%6u  %10u  %10u\n", $fd, $total_writes, $bytes_written);
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+       return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+          "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+       printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
+
+
diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl
new file mode 100644 (file)
index 0000000..da601fa
--- /dev/null
@@ -0,0 +1,170 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display r/w activity for all processes
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my %reads;
+my %writes;
+
+sub syscalls::sys_exit_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $nr, $ret) = @_;
+
+    if ($ret > 0) {
+       $reads{$common_pid}{bytes_read} += $ret;
+    } else {
+       if (!defined ($reads{$common_pid}{bytes_read})) {
+           $reads{$common_pid}{bytes_read} = 0;
+       }
+       $reads{$common_pid}{errors}{$ret}++;
+    }
+}
+
+sub syscalls::sys_enter_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $nr, $fd, $buf, $count) = @_;
+
+    $reads{$common_pid}{bytes_requested} += $count;
+    $reads{$common_pid}{total_reads}++;
+    $reads{$common_pid}{comm} = $common_comm;
+}
+
+sub syscalls::sys_exit_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $nr, $ret) = @_;
+
+    if ($ret <= 0) {
+       $writes{$common_pid}{errors}{$ret}++;
+    }
+}
+
+sub syscalls::sys_enter_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $nr, $fd, $buf, $count) = @_;
+
+    $writes{$common_pid}{bytes_written} += $count;
+    $writes{$common_pid}{total_writes}++;
+    $writes{$common_pid}{comm} = $common_comm;
+}
+
+sub trace_end
+{
+    printf("read counts by pid:\n\n");
+
+    printf("%6s  %20s  %10s  %10s  %10s\n", "pid", "comm",
+          "# reads", "bytes_requested", "bytes_read");
+    printf("%6s  %-20s  %10s  %10s  %10s\n", "------", "--------------------",
+          "-----------", "----------", "----------");
+
+    foreach my $pid (sort {$reads{$b}{bytes_read} <=>
+                              $reads{$a}{bytes_read}} keys %reads) {
+       my $comm = $reads{$pid}{comm};
+       my $total_reads = $reads{$pid}{total_reads};
+       my $bytes_requested = $reads{$pid}{bytes_requested};
+       my $bytes_read = $reads{$pid}{bytes_read};
+
+       printf("%6s  %-20s  %10s  %10s  %10s\n", $pid, $comm,
+              $total_reads, $bytes_requested, $bytes_read);
+    }
+
+    printf("\nfailed reads by pid:\n\n");
+
+    printf("%6s  %20s  %6s  %10s\n", "pid", "comm", "error #", "# errors");
+    printf("%6s  %20s  %6s  %10s\n", "------", "--------------------",
+          "------", "----------");
+
+    foreach my $pid (keys %reads) {
+       my $comm = $reads{$pid}{comm};
+       foreach my $err (sort {$reads{$b}{comm} cmp $reads{$a}{comm}}
+                        keys %{$reads{$pid}{errors}}) {
+           my $errors = $reads{$pid}{errors}{$err};
+
+           printf("%6d  %-20s  %6d  %10s\n", $pid, $comm, $err, $errors);
+       }
+    }
+
+    printf("\nwrite counts by pid:\n\n");
+
+    printf("%6s  %20s  %10s  %10s\n", "pid", "comm",
+          "# writes", "bytes_written");
+    printf("%6s  %-20s  %10s  %10s\n", "------", "--------------------",
+          "-----------", "----------");
+
+    foreach my $pid (sort {$writes{$b}{bytes_written} <=>
+                              $writes{$a}{bytes_written}} keys %writes) {
+       my $comm = $writes{$pid}{comm};
+       my $total_writes = $writes{$pid}{total_writes};
+       my $bytes_written = $writes{$pid}{bytes_written};
+
+       printf("%6s  %-20s  %10s  %10s\n", $pid, $comm,
+              $total_writes, $bytes_written);
+    }
+
+    printf("\nfailed writes by pid:\n\n");
+
+    printf("%6s  %20s  %6s  %10s\n", "pid", "comm", "error #", "# errors");
+    printf("%6s  %20s  %6s  %10s\n", "------", "--------------------",
+          "------", "----------");
+
+    foreach my $pid (keys %writes) {
+       my $comm = $writes{$pid}{comm};
+       foreach my $err (sort {$writes{$b}{comm} cmp $writes{$a}{comm}}
+                        keys %{$writes{$pid}{errors}}) {
+           my $errors = $writes{$pid}{errors}{$err};
+
+           printf("%6d  %-20s  %6d  %10s\n", $pid, $comm, $err, $errors);
+       }
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+       return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+          "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+       printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl
new file mode 100644 (file)
index 0000000..ed58ef2
--- /dev/null
@@ -0,0 +1,103 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display avg/min/max wakeup latency
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my %last_wakeup;
+
+my $max_wakeup_latency;
+my $min_wakeup_latency;
+my $total_wakeup_latency;
+my $total_wakeups;
+
+sub sched::sched_switch
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid,
+       $next_prio) = @_;
+
+    my $wakeup_ts = $last_wakeup{$common_cpu}{ts};
+    if ($wakeup_ts) {
+       my $switch_ts = nsecs($common_secs, $common_nsecs);
+       my $wakeup_latency = $switch_ts - $wakeup_ts;
+       if ($wakeup_latency > $max_wakeup_latency) {
+           $max_wakeup_latency = $wakeup_latency;
+       }
+       if ($wakeup_latency < $min_wakeup_latency) {
+           $min_wakeup_latency = $wakeup_latency;
+       }
+       $total_wakeup_latency += $wakeup_latency;
+       $total_wakeups++;
+    }
+    $last_wakeup{$common_cpu}{ts} = 0;
+}
+
+sub sched::sched_wakeup
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $comm, $pid, $prio, $success, $target_cpu) = @_;
+
+    $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs);
+}
+
+sub trace_begin
+{
+    $min_wakeup_latency = 1000000000;
+    $max_wakeup_latency = 0;
+}
+
+sub trace_end
+{
+    printf("wakeup_latency stats:\n\n");
+    print "total_wakeups: $total_wakeups\n";
+    printf("avg_wakeup_latency (ns): %u\n",
+          avg($total_wakeup_latency, $total_wakeups));
+    printf("min_wakeup_latency (ns): %u\n", $min_wakeup_latency);
+    printf("max_wakeup_latency (ns): %u\n", $max_wakeup_latency);
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+       return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+          "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+       printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl
new file mode 100644 (file)
index 0000000..511302c
--- /dev/null
@@ -0,0 +1,129 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Displays workqueue stats
+#
+# Usage:
+#
+#   perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e
+#     workqueue:workqueue_destruction -e workqueue:workqueue_execution
+#     -e workqueue:workqueue_insertion
+#
+#   perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my @cpus;
+
+sub workqueue::workqueue_destruction
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $thread_comm, $thread_pid) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{destroyed}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_creation
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $thread_comm, $thread_pid, $cpu) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{created}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_execution
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $thread_comm, $thread_pid, $func) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{executed}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_insertion
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm,
+       $thread_comm, $thread_pid, $func) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{inserted}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub trace_end
+{
+    print "workqueue work stats:\n\n";
+    my $cpu = 0;
+    printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name");
+    printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----");
+    foreach my $pidhash (@cpus) {
+       while ((my $pid, my $wqhash) = each %$pidhash) {
+           my $ins = $$wqhash{'inserted'};
+           my $exe = $$wqhash{'executed'};
+           my $comm = $$wqhash{'comm'};
+           if ($ins || $exe) {
+               printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm);
+           }
+       }
+       $cpu++;
+    }
+
+    $cpu = 0;
+    print "\nworkqueue lifecycle stats:\n\n";
+    printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name");
+    printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----");
+    foreach my $pidhash (@cpus) {
+       while ((my $pid, my $wqhash) = each %$pidhash) {
+           my $created = $$wqhash{'created'};
+           my $destroyed = $$wqhash{'destroyed'};
+           my $comm = $$wqhash{'comm'};
+           if ($created || $destroyed) {
+               printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed,
+                      $comm);
+           }
+       }
+       $cpu++;
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+       return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+          "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+       printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+       $common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
index 0b791bd346bc604df872553b8982f5bf137b8c86..35073621e5de9a83891b7e6e58e7469345e94b71 100644 (file)
@@ -29,3 +29,11 @@ unsigned char sane_ctype[256] = {
        A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0,         /* 112..127 */
        /* Nothing in the 128.. range */
 };
+
+const char *graph_line =
+       "_____________________________________________________________________"
+       "_____________________________________________________________________";
+const char *graph_dotted_line =
+       "---------------------------------------------------------------------"
+       "---------------------------------------------------------------------"
+       "---------------------------------------------------------------------";
index 14cb8465eb089817aa38b195c526c8b716fa344a..ca0bedf637c243135d40ffafc41562aa3a047c7d 100644 (file)
@@ -8,11 +8,9 @@ static struct perf_file_handler *curr_handler;
 static unsigned long   mmap_window = 32;
 static char            __cwd[PATH_MAX];
 
-static int
-process_event_stub(event_t *event __used,
-                  unsigned long offset __used,
-                  unsigned long head __used)
+static int process_event_stub(event_t *event __used)
 {
+       dump_printf(": unhandled!\n");
        return 0;
 }
 
@@ -40,30 +38,62 @@ void register_perf_file_handler(struct perf_file_handler *handler)
        curr_handler = handler;
 }
 
+static const char *event__name[] = {
+       [0]                      = "TOTAL",
+       [PERF_RECORD_MMAP]       = "MMAP",
+       [PERF_RECORD_LOST]       = "LOST",
+       [PERF_RECORD_COMM]       = "COMM",
+       [PERF_RECORD_EXIT]       = "EXIT",
+       [PERF_RECORD_THROTTLE]   = "THROTTLE",
+       [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
+       [PERF_RECORD_FORK]       = "FORK",
+       [PERF_RECORD_READ]       = "READ",
+       [PERF_RECORD_SAMPLE]     = "SAMPLE",
+};
+
+unsigned long event__total[PERF_RECORD_MAX];
+
+void event__print_totals(void)
+{
+       int i;
+       for (i = 0; i < PERF_RECORD_MAX; ++i)
+               pr_info("%10s events: %10ld\n",
+                       event__name[i], event__total[i]);
+}
+
 static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
        trace_event(event);
 
+       if (event->header.type < PERF_RECORD_MAX) {
+               dump_printf("%p [%p]: PERF_RECORD_%s",
+                           (void *)(offset + head),
+                           (void *)(long)(event->header.size),
+                           event__name[event->header.type]);
+               ++event__total[0];
+               ++event__total[event->header.type];
+       }
+
        switch (event->header.type) {
        case PERF_RECORD_SAMPLE:
-               return curr_handler->process_sample_event(event, offset, head);
+               return curr_handler->process_sample_event(event);
        case PERF_RECORD_MMAP:
-               return curr_handler->process_mmap_event(event, offset, head);
+               return curr_handler->process_mmap_event(event);
        case PERF_RECORD_COMM:
-               return curr_handler->process_comm_event(event, offset, head);
+               return curr_handler->process_comm_event(event);
        case PERF_RECORD_FORK:
-               return curr_handler->process_fork_event(event, offset, head);
+               return curr_handler->process_fork_event(event);
        case PERF_RECORD_EXIT:
-               return curr_handler->process_exit_event(event, offset, head);
+               return curr_handler->process_exit_event(event);
        case PERF_RECORD_LOST:
-               return curr_handler->process_lost_event(event, offset, head);
+               return curr_handler->process_lost_event(event);
        case PERF_RECORD_READ:
-               return curr_handler->process_read_event(event, offset, head);
+               return curr_handler->process_read_event(event);
        case PERF_RECORD_THROTTLE:
-               return curr_handler->process_throttle_event(event, offset, head);
+               return curr_handler->process_throttle_event(event);
        case PERF_RECORD_UNTHROTTLE:
-               return curr_handler->process_unthrottle_event(event, offset, head);
+               return curr_handler->process_unthrottle_event(event);
        default:
                curr_handler->total_unknown++;
                return -1;
@@ -106,7 +136,7 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
                            int *cwdlen,
                            char **cwd)
 {
-       int ret, rc = EXIT_FAILURE;
+       int err;
        struct perf_header *header;
        unsigned long head, shift;
        unsigned long offset = 0;
@@ -118,56 +148,63 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
        int input;
        char *buf;
 
-       if (!curr_handler)
-               die("Forgot to register perf file handler");
+       if (curr_handler == NULL) {
+               pr_debug("Forgot to register perf file handler\n");
+               return -EINVAL;
+       }
 
        page_size = getpagesize();
 
        input = open(input_name, O_RDONLY);
        if (input < 0) {
-               fprintf(stderr, " failed to open file: %s", input_name);
+               pr_err("Failed to open file: %s", input_name);
                if (!strcmp(input_name, "perf.data"))
-                       fprintf(stderr, "  (try 'perf record' first)");
-               fprintf(stderr, "\n");
-               exit(-1);
+                       pr_err("  (try 'perf record' first)");
+               pr_err("\n");
+               return -errno;
        }
 
-       ret = fstat(input, &input_stat);
-       if (ret < 0) {
-               perror("failed to stat file");
-               exit(-1);
+       if (fstat(input, &input_stat) < 0) {
+               pr_err("failed to stat file");
+               err = -errno;
+               goto out_close;
        }
 
+       err = -EACCES;
        if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
-               fprintf(stderr, "file: %s not owned by current user or root\n",
+               pr_err("file: %s not owned by current user or root\n",
                        input_name);
-               exit(-1);
+               goto out_close;
        }
 
-       if (!input_stat.st_size) {
-               fprintf(stderr, "zero-sized file, nothing to do!\n");
-               exit(0);
+       if (input_stat.st_size == 0) {
+               pr_info("zero-sized file, nothing to do!\n");
+               goto done;
        }
 
-       *pheader = perf_header__read(input);
-       header = *pheader;
+       err = -ENOMEM;
+       header = perf_header__new();
+       if (header == NULL)
+               goto out_close;
+
+       err = perf_header__read(header, input);
+       if (err < 0)
+               goto out_delete;
+       *pheader = header;
        head = header->data_offset;
 
        sample_type = perf_header__sample_type(header);
 
-       if (curr_handler->sample_type_check)
-               if (curr_handler->sample_type_check(sample_type) < 0)
-                       exit(-1);
-
-       if (load_kernel(NULL) < 0) {
-               perror("failed to load kernel symbols");
-               return EXIT_FAILURE;
-       }
+       err = -EINVAL;
+       if (curr_handler->sample_type_check &&
+           curr_handler->sample_type_check(sample_type) < 0)
+               goto out_delete;
 
        if (!full_paths) {
                if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
-                       perror("failed to get the current directory");
-                       return EXIT_FAILURE;
+                       pr_err("failed to get the current directory\n");
+                       err = -errno;
+                       goto out_delete;
                }
                *cwd = __cwd;
                *cwdlen = strlen(*cwd);
@@ -181,11 +218,12 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
        head -= shift;
 
 remap:
-       buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-                          MAP_SHARED, input, offset);
+       buf = mmap(NULL, page_size * mmap_window, PROT_READ,
+                  MAP_SHARED, input, offset);
        if (buf == MAP_FAILED) {
-               perror("failed to mmap file");
-               exit(-1);
+               pr_err("failed to mmap file\n");
+               err = -errno;
+               goto out_delete;
        }
 
 more:
@@ -242,10 +280,12 @@ more:
                goto more;
 
 done:
-       rc = EXIT_SUCCESS;
+       err = 0;
+out_close:
        close(input);
 
-       return rc;
+       return err;
+out_delete:
+       perf_header__delete(header);
+       goto out_close;
 }
-
-
index ae036ecd76254f993000f4847b1bc28a928f1fa1..3180ff7e3633b4dff29d167c3d2a7e23367d23fb 100644 (file)
@@ -4,7 +4,7 @@
 #include "event.h"
 #include "header.h"
 
-typedef int (*event_type_handler_t)(event_t *, unsigned long, unsigned long);
+typedef int (*event_type_handler_t)(event_t *);
 
 struct perf_file_handler {
        event_type_handler_t    process_sample_event;
index 1dae7e3b400da6622eb0d133068b717d9aa90bd4..414b89d1bde9ec8637ea0390bb29f20f1dd84ec3 100644 (file)
@@ -2,6 +2,7 @@
 #include "event.h"
 #include "debug.h"
 #include "string.h"
+#include "thread.h"
 
 static pid_t event__synthesize_comm(pid_t pid, int full,
                                    int (*process)(event_t *event))
@@ -175,3 +176,137 @@ void event__synthesize_threads(int (*process)(event_t *event))
 
        closedir(proc);
 }
+
+char *event__cwd;
+int  event__cwdlen;
+
+struct events_stats event__stats;
+
+int event__process_comm(event_t *self)
+{
+       struct thread *thread = threads__findnew(self->comm.pid);
+
+       dump_printf(": %s:%d\n", self->comm.comm, self->comm.pid);
+
+       if (thread == NULL || thread__set_comm(thread, self->comm.comm)) {
+               dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+int event__process_lost(event_t *self)
+{
+       dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
+       event__stats.lost += self->lost.lost;
+       return 0;
+}
+
+int event__process_mmap(event_t *self)
+{
+       struct thread *thread = threads__findnew(self->mmap.pid);
+       struct map *map = map__new(&self->mmap, MAP__FUNCTION,
+                                  event__cwd, event__cwdlen);
+
+       dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n",
+                   self->mmap.pid, self->mmap.tid,
+                   (void *)(long)self->mmap.start,
+                   (void *)(long)self->mmap.len,
+                   (void *)(long)self->mmap.pgoff,
+                   self->mmap.filename);
+
+       if (thread == NULL || map == NULL)
+               dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
+       else
+               thread__insert_map(thread, map);
+
+       return 0;
+}
+
+int event__process_task(event_t *self)
+{
+       struct thread *thread = threads__findnew(self->fork.pid);
+       struct thread *parent = threads__findnew(self->fork.ppid);
+
+       dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid,
+                   self->fork.ppid, self->fork.ptid);
+       /*
+        * A thread clone will have the same PID for both parent and child.
+        */
+       if (thread == parent)
+               return 0;
+
+       if (self->header.type == PERF_RECORD_EXIT)
+               return 0;
+
+       if (thread == NULL || parent == NULL ||
+           thread__fork(thread, parent) < 0) {
+               dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+void thread__find_addr_location(struct thread *self, u8 cpumode,
+                               enum map_type type, u64 addr,
+                               struct addr_location *al,
+                               symbol_filter_t filter)
+{
+       struct thread *thread = al->thread = self;
+
+       al->addr = addr;
+
+       if (cpumode & PERF_RECORD_MISC_KERNEL) {
+               al->level = 'k';
+               thread = kthread;
+       } else if (cpumode & PERF_RECORD_MISC_USER)
+               al->level = '.';
+       else {
+               al->level = 'H';
+               al->map = NULL;
+               al->sym = NULL;
+               return;
+       }
+try_again:
+       al->map = thread__find_map(thread, type, al->addr);
+       if (al->map == NULL) {
+               /*
+                * If this is outside of all known maps, and is a negative
+                * address, try to look it up in the kernel dso, as it might be
+                * a vsyscall or vdso (which executes in user-mode).
+                *
+                * XXX This is nasty, we should have a symbol list in the
+                * "[vdso]" dso, but for now lets use the old trick of looking
+                * in the whole kernel symbol list.
+                */
+               if ((long long)al->addr < 0 && thread != kthread) {
+                       thread = kthread;
+                       goto try_again;
+               }
+               al->sym = NULL;
+       } else {
+               al->addr = al->map->map_ip(al->map, al->addr);
+               al->sym = map__find_symbol(al->map, al->addr, filter);
+       }
+}
+
+int event__preprocess_sample(const event_t *self, struct addr_location *al,
+                            symbol_filter_t filter)
+{
+       u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+       struct thread *thread = threads__findnew(self->ip.pid);
+
+       if (thread == NULL)
+               return -1;
+
+       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+       thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+                                  self->ip.ip, al, filter);
+       dump_printf(" ...... dso: %s\n",
+                   al->map ? al->map->dso->long_name :
+                       al->level == 'H' ? "[hypervisor]" : "<not found>");
+       return 0;
+}
index 1f771ce3a95793c29abf14106e91408d86f319d6..a4cc8105cf675f42285896e674dd81193da7582b 100644 (file)
@@ -69,13 +69,6 @@ struct build_id_event {
        char                     filename[];
 };
 
-struct build_id_list {
-       struct build_id_event   event;
-       struct list_head        list;
-       const char              *dso_name;
-       int                     len;
-};
-
 typedef union event_union {
        struct perf_event_header        header;
        struct ip_event                 ip;
@@ -87,6 +80,19 @@ typedef union event_union {
        struct sample_event             sample;
 } event_t;
 
+struct events_stats {
+       unsigned long total;
+       unsigned long lost;
+};
+
+void event__print_totals(void);
+
+enum map_type {
+       MAP__FUNCTION = 0,
+
+       MAP__NR_TYPES,
+};
+
 struct map {
        union {
                struct rb_node  rb_node;
@@ -94,6 +100,7 @@ struct map {
        };
        u64                     start;
        u64                     end;
+       enum map_type           type;
        u64                     pgoff;
        u64                     (*map_ip)(struct map *, u64);
        u64                     (*unmap_ip)(struct map *, u64);
@@ -119,15 +126,34 @@ struct symbol;
 
 typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
 
-void map__init(struct map *self, u64 start, u64 end, u64 pgoff,
-              struct dso *dso);
-struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen);
+void map__init(struct map *self, enum map_type type,
+              u64 start, u64 end, u64 pgoff, struct dso *dso);
+struct map *map__new(struct mmap_event *event, enum map_type,
+                    char *cwd, int cwdlen);
+void map__delete(struct map *self);
 struct map *map__clone(struct map *self);
 int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *self, FILE *fp);
-struct symbol *map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter);
+struct symbol *map__find_symbol(struct map *self, u64 addr,
+                               symbol_filter_t filter);
+void map__fixup_start(struct map *self);
+void map__fixup_end(struct map *self);
 
 int event__synthesize_thread(pid_t pid, int (*process)(event_t *event));
 void event__synthesize_threads(int (*process)(event_t *event));
 
+extern char *event__cwd;
+extern int  event__cwdlen;
+extern struct events_stats event__stats;
+extern unsigned long event__total[PERF_RECORD_MAX];
+
+int event__process_comm(event_t *self);
+int event__process_lost(event_t *self);
+int event__process_mmap(event_t *self);
+int event__process_task(event_t *self);
+
+struct addr_location;
+int event__preprocess_sample(const event_t *self, struct addr_location *al,
+                            symbol_filter_t filter);
+
 #endif /* __PERF_RECORD_H */
index b01a9537977f41a57e7c61e803ff74bcdd58d4e9..4805e6dfd23c8a77f2ea7a7bf56bc15c8e5a4544 100644 (file)
@@ -63,7 +63,7 @@ int perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
  */
 struct perf_header *perf_header__new(void)
 {
-       struct perf_header *self = calloc(sizeof(*self), 1);
+       struct perf_header *self = zalloc(sizeof(*self));
 
        if (self != NULL) {
                self->size = 1;
@@ -78,16 +78,24 @@ struct perf_header *perf_header__new(void)
        return self;
 }
 
+void perf_header__delete(struct perf_header *self)
+{
+       int i;
+
+       for (i = 0; i < self->attrs; ++i)
+               perf_header_attr__delete(self->attr[i]);
+
+       free(self->attr);
+       free(self);
+}
+
 int perf_header__add_attr(struct perf_header *self,
                          struct perf_header_attr *attr)
 {
-       int pos = self->attrs;
-
        if (self->frozen)
                return -1;
 
-       self->attrs++;
-       if (self->attrs > self->size) {
+       if (self->attrs == self->size) {
                int nsize = self->size * 2;
                struct perf_header_attr **nattr;
 
@@ -98,7 +106,8 @@ int perf_header__add_attr(struct perf_header *self,
                self->size = nsize;
                self->attr = nattr;
        }
-       self->attr[pos] = attr;
+
+       self->attr[self->attrs++] = attr;
        return 0;
 }
 
@@ -167,7 +176,7 @@ static int do_write(int fd, const void *buf, size_t size)
                int ret = write(fd, buf, size);
 
                if (ret < 0)
-                       return -1;
+                       return -errno;
 
                size -= ret;
                buf += ret;
@@ -176,43 +185,59 @@ static int do_write(int fd, const void *buf, size_t size)
        return 0;
 }
 
-static int write_buildid_table(int fd, struct list_head *id_head)
+static int __dsos__write_buildid_table(struct list_head *head, int fd)
 {
-       struct build_id_list *iter, *next;
-
-       list_for_each_entry_safe(iter, next, id_head, list) {
-               struct build_id_event *b = &iter->event;
-
-               if (do_write(fd, b, sizeof(*b)) < 0 ||
-                   do_write(fd, iter->dso_name, iter->len) < 0)
-                       return -1;
-               list_del(&iter->list);
-               free(iter);
+       struct dso *pos;
+
+       list_for_each_entry(pos, head, node) {
+               int err;
+               struct build_id_event b;
+               size_t len;
+
+               if (!pos->has_build_id)
+                       continue;
+               len = pos->long_name_len + 1;
+               len = ALIGN(len, 64);
+               memset(&b, 0, sizeof(b));
+               memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
+               b.header.size = sizeof(b) + len;
+               err = do_write(fd, &b, sizeof(b));
+               if (err < 0)
+                       return err;
+               err = do_write(fd, pos->long_name, len);
+               if (err < 0)
+                       return err;
        }
 
        return 0;
 }
 
-static void
-perf_header__adds_write(struct perf_header *self, int fd)
+static int dsos__write_buildid_table(int fd)
+{
+       int err = __dsos__write_buildid_table(&dsos__kernel, fd);
+       if (err == 0)
+               err = __dsos__write_buildid_table(&dsos__user, fd);
+       return err;
+}
+
+static int perf_header__adds_write(struct perf_header *self, int fd)
 {
-       LIST_HEAD(id_list);
        int nr_sections;
        struct perf_file_section *feat_sec;
        int sec_size;
        u64 sec_start;
-       int idx = 0;
+       int idx = 0, err;
 
-       if (fetch_build_id_table(&id_list))
+       if (dsos__read_build_ids())
                perf_header__set_feat(self, HEADER_BUILD_ID);
 
        nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
        if (!nr_sections)
-               return;
+               return 0;
 
        feat_sec = calloc(sizeof(*feat_sec), nr_sections);
-       if (!feat_sec)
-               die("No memory");
+       if (feat_sec == NULL)
+               return -ENOMEM;
 
        sec_size = sizeof(*feat_sec) * nr_sections;
 
@@ -238,23 +263,29 @@ perf_header__adds_write(struct perf_header *self, int fd)
 
                /* Write build-ids */
                buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
-               if (write_buildid_table(fd, &id_list) < 0)
-                       die("failed to write buildid table");
+               err = dsos__write_buildid_table(fd);
+               if (err < 0) {
+                       pr_debug("failed to write buildid table\n");
+                       goto out_free;
+               }
                buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset;
        }
 
        lseek(fd, sec_start, SEEK_SET);
-       if (do_write(fd, feat_sec, sec_size) < 0)
-               die("failed to write feature section");
+       err = do_write(fd, feat_sec, sec_size);
+       if (err < 0)
+               pr_debug("failed to write feature section\n");
+out_free:
        free(feat_sec);
+       return err;
 }
 
-void perf_header__write(struct perf_header *self, int fd, bool at_exit)
+int perf_header__write(struct perf_header *self, int fd, bool at_exit)
 {
        struct perf_file_header f_header;
        struct perf_file_attr   f_attr;
        struct perf_header_attr *attr;
-       int i;
+       int i, err;
 
        lseek(fd, sizeof(f_header), SEEK_SET);
 
@@ -263,8 +294,11 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit)
                attr = self->attr[i];
 
                attr->id_offset = lseek(fd, 0, SEEK_CUR);
-               if (do_write(fd, attr->id, attr->ids * sizeof(u64)) < 0)
-                       die("failed to write perf header");
+               err = do_write(fd, attr->id, attr->ids * sizeof(u64));
+               if (err < 0) {
+                       pr_debug("failed to write perf header\n");
+                       return err;
+               }
        }
 
 
@@ -280,20 +314,30 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit)
                                .size   = attr->ids * sizeof(u64),
                        }
                };
-               if (do_write(fd, &f_attr, sizeof(f_attr)) < 0)
-                       die("failed to write perf header attribute");
+               err = do_write(fd, &f_attr, sizeof(f_attr));
+               if (err < 0) {
+                       pr_debug("failed to write perf header attribute\n");
+                       return err;
+               }
        }
 
        self->event_offset = lseek(fd, 0, SEEK_CUR);
        self->event_size = event_count * sizeof(struct perf_trace_event_type);
-       if (events)
-               if (do_write(fd, events, self->event_size) < 0)
-                       die("failed to write perf header events");
+       if (events) {
+               err = do_write(fd, events, self->event_size);
+               if (err < 0) {
+                       pr_debug("failed to write perf header events\n");
+                       return err;
+               }
+       }
 
        self->data_offset = lseek(fd, 0, SEEK_CUR);
 
-       if (at_exit)
-               perf_header__adds_write(self, fd);
+       if (at_exit) {
+               err = perf_header__adds_write(self, fd);
+               if (err < 0)
+                       return err;
+       }
 
        f_header = (struct perf_file_header){
                .magic     = PERF_MAGIC,
@@ -316,11 +360,15 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit)
        memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features));
 
        lseek(fd, 0, SEEK_SET);
-       if (do_write(fd, &f_header, sizeof(f_header)) < 0)
-               die("failed to write perf header");
+       err = do_write(fd, &f_header, sizeof(f_header));
+       if (err < 0) {
+               pr_debug("failed to write perf header\n");
+               return err;
+       }
        lseek(fd, self->data_offset + self->data_size, SEEK_SET);
 
        self->frozen = 1;
+       return 0;
 }
 
 static void do_read(int fd, void *buf, size_t size)
@@ -430,19 +478,17 @@ static int perf_file_section__process(struct perf_file_section *self,
        return 0;
 }
 
-struct perf_header *perf_header__read(int fd)
+int perf_header__read(struct perf_header *self, int fd)
 {
-       struct perf_header      *self = perf_header__new();
        struct perf_file_header f_header;
        struct perf_file_attr   f_attr;
        u64                     f_id;
        int nr_attrs, nr_ids, i, j;
 
-       if (self == NULL)
-               die("nomem");
-
-       if (perf_file_header__read(&f_header, self, fd) < 0)
-               die("incompatible file format");
+       if (perf_file_header__read(&f_header, self, fd) < 0) {
+               pr_debug("incompatible file format\n");
+               return -EINVAL;
+       }
 
        nr_attrs = f_header.attrs.size / sizeof(f_attr);
        lseek(fd, f_header.attrs.offset, SEEK_SET);
@@ -456,7 +502,7 @@ struct perf_header *perf_header__read(int fd)
 
                attr = perf_header_attr__new(&f_attr.attr);
                if (attr == NULL)
-                        die("nomem");
+                        return -ENOMEM;
 
                nr_ids = f_attr.ids.size / sizeof(u64);
                lseek(fd, f_attr.ids.offset, SEEK_SET);
@@ -464,11 +510,15 @@ struct perf_header *perf_header__read(int fd)
                for (j = 0; j < nr_ids; j++) {
                        do_read(fd, &f_id, sizeof(f_id));
 
-                       if (perf_header_attr__add_id(attr, f_id) < 0)
-                               die("nomem");
+                       if (perf_header_attr__add_id(attr, f_id) < 0) {
+                               perf_header_attr__delete(attr);
+                               return -ENOMEM;
+                       }
+               }
+               if (perf_header__add_attr(self, attr) < 0) {
+                       perf_header_attr__delete(attr);
+                       return -ENOMEM;
                }
-               if (perf_header__add_attr(self, attr) < 0)
-                        die("nomem");
 
                lseek(fd, tmp, SEEK_SET);
        }
@@ -476,8 +526,8 @@ struct perf_header *perf_header__read(int fd)
        if (f_header.event_types.size) {
                lseek(fd, f_header.event_types.offset, SEEK_SET);
                events = malloc(f_header.event_types.size);
-               if (!events)
-                       die("nomem");
+               if (events == NULL)
+                       return -ENOMEM;
                do_read(fd, events, f_header.event_types.size);
                event_count =  f_header.event_types.size / sizeof(struct perf_trace_event_type);
        }
@@ -487,8 +537,7 @@ struct perf_header *perf_header__read(int fd)
        lseek(fd, self->data_offset, SEEK_SET);
 
        self->frozen = 1;
-
-       return self;
+       return 0;
 }
 
 u64 perf_header__sample_type(struct perf_header *header)
index f46a94e09eea9f603b10d2769244a136b525f1a3..d1dbe2b79c42bfcbf9a3e90e965076b6a5afc075 100644 (file)
@@ -55,8 +55,11 @@ struct perf_header {
        DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
 };
 
-struct perf_header *perf_header__read(int fd);
-void perf_header__write(struct perf_header *self, int fd, bool at_exit);
+struct perf_header *perf_header__new(void);
+void perf_header__delete(struct perf_header *self);
+
+int perf_header__read(struct perf_header *self, int fd);
+int perf_header__write(struct perf_header *self, int fd, bool at_exit);
 
 int perf_header__add_attr(struct perf_header *self,
                          struct perf_header_attr *attr);
@@ -75,8 +78,6 @@ perf_header__find_attr(u64 id, struct perf_header *header);
 void perf_header__set_feat(struct perf_header *self, int feat);
 bool perf_header__has_feat(const struct perf_header *self, int feat);
 
-struct perf_header *perf_header__new(void);
-
 int perf_header__process_sections(struct perf_header *self, int fd,
                                  int (*process)(struct perf_file_section *self,
                                                 int feat, int fd));
index 7393a02fd8d470fe9ac50340b41e61fe7564b804..0ebf6ee16caa4f4714aedce18ddb77eb1592962e 100644 (file)
@@ -10,31 +10,23 @@ struct callchain_param      callchain_param = {
        .min_percent = 0.5
 };
 
-unsigned long total;
-unsigned long total_mmap;
-unsigned long total_comm;
-unsigned long total_fork;
-unsigned long total_unknown;
-unsigned long total_lost;
-
 /*
  * histogram, sorted on item, collects counts
  */
 
-struct hist_entry *__hist_entry__add(struct thread *thread, struct map *map,
-                                    struct symbol *sym,
+struct hist_entry *__hist_entry__add(struct addr_location *al,
                                     struct symbol *sym_parent,
-                                    u64 ip, u64 count, char level, bool *hit)
+                                    u64 count, bool *hit)
 {
        struct rb_node **p = &hist.rb_node;
        struct rb_node *parent = NULL;
        struct hist_entry *he;
        struct hist_entry entry = {
-               .thread = thread,
-               .map    = map,
-               .sym    = sym,
-               .ip     = ip,
-               .level  = level,
+               .thread = al->thread,
+               .map    = al->map,
+               .sym    = al->sym,
+               .ip     = al->addr,
+               .level  = al->level,
                .count  = count,
                .parent = sym_parent,
        };
index ac2149c559b0c0cb10cc02a3ab95b9f93c2b9a39..3020db0c92927354168c129c63bbf5d474ca793f 100644 (file)
@@ -36,9 +36,9 @@ extern unsigned long total_fork;
 extern unsigned long total_unknown;
 extern unsigned long total_lost;
 
-struct hist_entry *__hist_entry__add(struct thread *thread, struct map *map,
-                                    struct symbol *sym, struct symbol *parent,
-                                    u64 ip, u64 count, char level, bool *hit);
+struct hist_entry *__hist_entry__add(struct addr_location *al,
+                                    struct symbol *parent,
+                                    u64 count, bool *hit);
 extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
 extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
 extern void hist_entry__free(struct hist_entry *);
diff --git a/tools/perf/util/include/asm/bug.h b/tools/perf/util/include/asm/bug.h
new file mode 100644 (file)
index 0000000..7fcc681
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef _PERF_ASM_GENERIC_BUG_H
+#define _PERF_ASM_GENERIC_BUG_H
+
+#define __WARN_printf(arg...)  do { fprintf(stderr, arg); } while (0)
+
+#define WARN(condition, format...) ({          \
+       int __ret_warn_on = !!(condition);      \
+       if (unlikely(__ret_warn_on))            \
+               __WARN_printf(format);          \
+       unlikely(__ret_warn_on);                \
+})
+
+#define WARN_ONCE(condition, format...)        ({      \
+       static int __warned;                    \
+       int __ret_warn_once = !!(condition);    \
+                                               \
+       if (unlikely(__ret_warn_once))          \
+               if (WARN(!__warned, format))    \
+                       __warned = 1;           \
+       unlikely(__ret_warn_once);              \
+})
+#endif
index ace57c36d1d0666e0eeac5e6d732c4dfbe5b9f33..8d63116e9435be917354b94b42ef9e6c9d026caf 100644 (file)
@@ -7,6 +7,8 @@
 #define CONFIG_GENERIC_FIND_FIRST_BIT
 #include "../../../../include/linux/bitops.h"
 
+#undef __KERNEL__
+
 static inline void set_bit(int nr, unsigned long *addr)
 {
        addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
index 94ca95073c40f594346bb552cd67f8ee43ecd435..69f94fe9db20a059e4e00f687bd109a543c7d0e0 100644 (file)
@@ -20,9 +20,10 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen)
        return n;
 }
 
-void map__init(struct map *self, u64 start, u64 end, u64 pgoff,
-              struct dso *dso)
+void map__init(struct map *self, enum map_type type,
+              u64 start, u64 end, u64 pgoff, struct dso *dso)
 {
+       self->type     = type;
        self->start    = start;
        self->end      = end;
        self->pgoff    = pgoff;
@@ -32,7 +33,8 @@ void map__init(struct map *self, u64 start, u64 end, u64 pgoff,
        RB_CLEAR_NODE(&self->rb_node);
 }
 
-struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen)
+struct map *map__new(struct mmap_event *event, enum map_type type,
+                    char *cwd, int cwdlen)
 {
        struct map *self = malloc(sizeof(*self));
 
@@ -63,7 +65,7 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen)
                if (dso == NULL)
                        goto out_delete;
 
-               map__init(self, event->start, event->start + event->len,
+               map__init(self, type, event->start, event->start + event->len,
                          event->pgoff, dso);
 
                if (self->dso == vdso || anon)
@@ -75,12 +77,37 @@ out_delete:
        return NULL;
 }
 
+void map__delete(struct map *self)
+{
+       free(self);
+}
+
+void map__fixup_start(struct map *self)
+{
+       struct rb_root *symbols = &self->dso->symbols[self->type];
+       struct rb_node *nd = rb_first(symbols);
+       if (nd != NULL) {
+               struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+               self->start = sym->start;
+       }
+}
+
+void map__fixup_end(struct map *self)
+{
+       struct rb_root *symbols = &self->dso->symbols[self->type];
+       struct rb_node *nd = rb_last(symbols);
+       if (nd != NULL) {
+               struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+               self->end = sym->end;
+       }
+}
+
 #define DSO__DELETED "(deleted)"
 
-struct symbol *
-map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter)
+struct symbol *map__find_symbol(struct map *self, u64 addr,
+                               symbol_filter_t filter)
 {
-       if (!self->dso->loaded) {
+       if (!dso__loaded(self->dso, self->type)) {
                int nr = dso__load(self->dso, self, filter);
 
                if (nr < 0) {
@@ -113,7 +140,7 @@ map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter)
                }
        }
 
-       return self->dso->find_symbol(self->dso, ip);
+       return self->dso->find_symbol(self->dso, self->type, addr);
 }
 
 struct map *map__clone(struct map *self)
index 0faf4f2bb5caaa2c39177009fb26fbf69873bdfa..9e5dbd66d34d7aa95aad038d2168eaed5ef920b3 100644 (file)
@@ -1,4 +1,4 @@
-
+#include "../../../include/linux/hw_breakpoint.h"
 #include "util.h"
 #include "../perf.h"
 #include "parse-options.h"
@@ -197,7 +197,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
                        if (id == config) {
                                closedir(evt_dir);
                                closedir(sys_dir);
-                               path = calloc(1, sizeof(path));
+                               path = zalloc(sizeof(path));
                                path->system = malloc(MAX_EVENT_LENGTH);
                                if (!path->system) {
                                        free(path);
@@ -540,6 +540,81 @@ static enum event_result parse_tracepoint_event(const char **strp,
                                                     attr, strp);
 }
 
+static enum event_result
+parse_breakpoint_type(const char *type, const char **strp,
+                     struct perf_event_attr *attr)
+{
+       int i;
+
+       for (i = 0; i < 3; i++) {
+               if (!type[i])
+                       break;
+
+               switch (type[i]) {
+               case 'r':
+                       attr->bp_type |= HW_BREAKPOINT_R;
+                       break;
+               case 'w':
+                       attr->bp_type |= HW_BREAKPOINT_W;
+                       break;
+               case 'x':
+                       attr->bp_type |= HW_BREAKPOINT_X;
+                       break;
+               default:
+                       return EVT_FAILED;
+               }
+       }
+       if (!attr->bp_type) /* Default */
+               attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+
+       *strp = type + i;
+
+       return EVT_HANDLED;
+}
+
+static enum event_result
+parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
+{
+       const char *target;
+       const char *type;
+       char *endaddr;
+       u64 addr;
+       enum event_result err;
+
+       target = strchr(*strp, ':');
+       if (!target)
+               return EVT_FAILED;
+
+       if (strncmp(*strp, "mem", target - *strp) != 0)
+               return EVT_FAILED;
+
+       target++;
+
+       addr = strtoull(target, &endaddr, 0);
+       if (target == endaddr)
+               return EVT_FAILED;
+
+       attr->bp_addr = addr;
+       *strp = endaddr;
+
+       type = strchr(target, ':');
+
+       /* If no type is defined, just rw as default */
+       if (!type) {
+               attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+       } else {
+               err = parse_breakpoint_type(++type, strp, attr);
+               if (err == EVT_FAILED)
+                       return EVT_FAILED;
+       }
+
+       /* We should find a nice way to override the access type */
+       attr->bp_len = HW_BREAKPOINT_LEN_4;
+       attr->type = PERF_TYPE_BREAKPOINT;
+
+       return EVT_HANDLED;
+}
+
 static int check_events(const char *str, unsigned int i)
 {
        int n;
@@ -673,6 +748,10 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr)
        if (ret != EVT_FAILED)
                goto modifier;
 
+       ret = parse_breakpoint_event(str, attr);
+       if (ret != EVT_FAILED)
+               goto modifier;
+
        fprintf(stderr, "invalid or unsupported event: '%s'\n", *str);
        fprintf(stderr, "Run 'perf list' for a list of valid events\n");
        return EVT_FAILED;
@@ -859,6 +938,9 @@ void print_events(void)
                "rNNN");
        printf("\n");
 
+       printf("  %-42s [hardware breakpoint]\n", "mem:<addr>[:access]");
+       printf("\n");
+
        print_tracepoint_events();
 
        exit(129);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
new file mode 100644 (file)
index 0000000..cd7fbda
--- /dev/null
@@ -0,0 +1,484 @@
+/*
+ * probe-event.c : perf-probe definition to kprobe_events format converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <limits.h>
+
+#undef _GNU_SOURCE
+#include "event.h"
+#include "string.h"
+#include "strlist.h"
+#include "debug.h"
+#include "parse-events.h"  /* For debugfs_path */
+#include "probe-event.h"
+
+#define MAX_CMDLEN 256
+#define MAX_PROBE_ARGS 128
+#define PERFPROBE_GROUP "probe"
+
+#define semantic_error(msg ...) die("Semantic error :" msg)
+
+/* If there is no space to write, returns -E2BIG. */
+static int e_snprintf(char *str, size_t size, const char *format, ...)
+{
+       int ret;
+       va_list ap;
+       va_start(ap, format);
+       ret = vsnprintf(str, size, format, ap);
+       va_end(ap);
+       if (ret >= (int)size)
+               ret = -E2BIG;
+       return ret;
+}
+
+/* Parse probepoint definition. */
+static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp)
+{
+       char *ptr, *tmp;
+       char c, nc = 0;
+       /*
+        * <Syntax>
+        * perf probe SRC:LN
+        * perf probe FUNC[+OFFS|%return][@SRC]
+        */
+
+       ptr = strpbrk(arg, ":+@%");
+       if (ptr) {
+               nc = *ptr;
+               *ptr++ = '\0';
+       }
+
+       /* Check arg is function or file and copy it */
+       if (strchr(arg, '.'))   /* File */
+               pp->file = strdup(arg);
+       else                    /* Function */
+               pp->function = strdup(arg);
+       DIE_IF(pp->file == NULL && pp->function == NULL);
+
+       /* Parse other options */
+       while (ptr) {
+               arg = ptr;
+               c = nc;
+               ptr = strpbrk(arg, ":+@%");
+               if (ptr) {
+                       nc = *ptr;
+                       *ptr++ = '\0';
+               }
+               switch (c) {
+               case ':':       /* Line number */
+                       pp->line = strtoul(arg, &tmp, 0);
+                       if (*tmp != '\0')
+                               semantic_error("There is non-digit charactor"
+                                               " in line number.");
+                       break;
+               case '+':       /* Byte offset from a symbol */
+                       pp->offset = strtoul(arg, &tmp, 0);
+                       if (*tmp != '\0')
+                               semantic_error("There is non-digit charactor"
+                                               " in offset.");
+                       break;
+               case '@':       /* File name */
+                       if (pp->file)
+                               semantic_error("SRC@SRC is not allowed.");
+                       pp->file = strdup(arg);
+                       DIE_IF(pp->file == NULL);
+                       if (ptr)
+                               semantic_error("@SRC must be the last "
+                                              "option.");
+                       break;
+               case '%':       /* Probe places */
+                       if (strcmp(arg, "return") == 0) {
+                               pp->retprobe = 1;
+                       } else  /* Others not supported yet */
+                               semantic_error("%%%s is not supported.", arg);
+                       break;
+               default:
+                       DIE_IF("Program has a bug.");
+                       break;
+               }
+       }
+
+       /* Exclusion check */
+       if (pp->line && pp->offset)
+               semantic_error("Offset can't be used with line number.");
+
+       if (!pp->line && pp->file && !pp->function)
+               semantic_error("File always requires line number.");
+
+       if (pp->offset && !pp->function)
+               semantic_error("Offset requires an entry function.");
+
+       if (pp->retprobe && !pp->function)
+               semantic_error("Return probe requires an entry function.");
+
+       if ((pp->offset || pp->line) && pp->retprobe)
+               semantic_error("Offset/Line can't be used with return probe.");
+
+       pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n",
+                pp->function, pp->file, pp->line, pp->offset, pp->retprobe);
+}
+
+/* Parse perf-probe event definition */
+int parse_perf_probe_event(const char *str, struct probe_point *pp)
+{
+       char **argv;
+       int argc, i, need_dwarf = 0;
+
+       argv = argv_split(str, &argc);
+       if (!argv)
+               die("argv_split failed.");
+       if (argc > MAX_PROBE_ARGS + 1)
+               semantic_error("Too many arguments");
+
+       /* Parse probe point */
+       parse_perf_probe_probepoint(argv[0], pp);
+       if (pp->file || pp->line)
+               need_dwarf = 1;
+
+       /* Copy arguments and ensure return probe has no C argument */
+       pp->nr_args = argc - 1;
+       pp->args = zalloc(sizeof(char *) * pp->nr_args);
+       for (i = 0; i < pp->nr_args; i++) {
+               pp->args[i] = strdup(argv[i + 1]);
+               if (!pp->args[i])
+                       die("Failed to copy argument.");
+               if (is_c_varname(pp->args[i])) {
+                       if (pp->retprobe)
+                               semantic_error("You can't specify local"
+                                               " variable for kretprobe");
+                       need_dwarf = 1;
+               }
+       }
+
+       argv_free(argv);
+       return need_dwarf;
+}
+
+/* Parse kprobe_events event into struct probe_point */
+void parse_trace_kprobe_event(const char *str, char **group, char **event,
+                             struct probe_point *pp)
+{
+       char pr;
+       char *p;
+       int ret, i, argc;
+       char **argv;
+
+       pr_debug("Parsing kprobe_events: %s\n", str);
+       argv = argv_split(str, &argc);
+       if (!argv)
+               die("argv_split failed.");
+       if (argc < 2)
+               semantic_error("Too less arguments.");
+
+       /* Scan event and group name. */
+       ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]",
+                    &pr, (float *)(void *)group, (float *)(void *)event);
+       if (ret != 3)
+               semantic_error("Failed to parse event name: %s", argv[0]);
+       pr_debug("Group:%s Event:%s probe:%c\n", *group, *event, pr);
+
+       if (!pp)
+               goto end;
+
+       pp->retprobe = (pr == 'r');
+
+       /* Scan function name and offset */
+       ret = sscanf(argv[1], "%a[^+]+%d", (float *)(void *)&pp->function, &pp->offset);
+       if (ret == 1)
+               pp->offset = 0;
+
+       /* kprobe_events doesn't have this information */
+       pp->line = 0;
+       pp->file = NULL;
+
+       pp->nr_args = argc - 2;
+       pp->args = zalloc(sizeof(char *) * pp->nr_args);
+       for (i = 0; i < pp->nr_args; i++) {
+               p = strchr(argv[i + 2], '=');
+               if (p)  /* We don't need which register is assigned. */
+                       *p = '\0';
+               pp->args[i] = strdup(argv[i + 2]);
+               if (!pp->args[i])
+                       die("Failed to copy argument.");
+       }
+
+end:
+       argv_free(argv);
+}
+
+int synthesize_perf_probe_event(struct probe_point *pp)
+{
+       char *buf;
+       char offs[64] = "", line[64] = "";
+       int i, len, ret;
+
+       pp->probes[0] = buf = zalloc(MAX_CMDLEN);
+       if (!buf)
+               die("Failed to allocate memory by zalloc.");
+       if (pp->offset) {
+               ret = e_snprintf(offs, 64, "+%d", pp->offset);
+               if (ret <= 0)
+                       goto error;
+       }
+       if (pp->line) {
+               ret = e_snprintf(line, 64, ":%d", pp->line);
+               if (ret <= 0)
+                       goto error;
+       }
+
+       if (pp->function)
+               ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function,
+                                offs, pp->retprobe ? "%return" : "", line);
+       else
+               ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->file, line);
+       if (ret <= 0)
+               goto error;
+       len = ret;
+
+       for (i = 0; i < pp->nr_args; i++) {
+               ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
+                                pp->args[i]);
+               if (ret <= 0)
+                       goto error;
+               len += ret;
+       }
+       pp->found = 1;
+
+       return pp->found;
+error:
+       free(pp->probes[0]);
+
+       return ret;
+}
+
+int synthesize_trace_kprobe_event(struct probe_point *pp)
+{
+       char *buf;
+       int i, len, ret;
+
+       pp->probes[0] = buf = zalloc(MAX_CMDLEN);
+       if (!buf)
+               die("Failed to allocate memory by zalloc.");
+       ret = e_snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset);
+       if (ret <= 0)
+               goto error;
+       len = ret;
+
+       for (i = 0; i < pp->nr_args; i++) {
+               ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
+                                pp->args[i]);
+               if (ret <= 0)
+                       goto error;
+               len += ret;
+       }
+       pp->found = 1;
+
+       return pp->found;
+error:
+       free(pp->probes[0]);
+
+       return ret;
+}
+
+static int open_kprobe_events(int flags, int mode)
+{
+       char buf[PATH_MAX];
+       int ret;
+
+       ret = e_snprintf(buf, PATH_MAX, "%s/../kprobe_events", debugfs_path);
+       if (ret < 0)
+               die("Failed to make kprobe_events path.");
+
+       ret = open(buf, flags, mode);
+       if (ret < 0) {
+               if (errno == ENOENT)
+                       die("kprobe_events file does not exist -"
+                           " please rebuild with CONFIG_KPROBE_TRACER.");
+               else
+                       die("Could not open kprobe_events file: %s",
+                           strerror(errno));
+       }
+       return ret;
+}
+
+/* Get raw string list of current kprobe_events */
+static struct strlist *get_trace_kprobe_event_rawlist(int fd)
+{
+       int ret, idx;
+       FILE *fp;
+       char buf[MAX_CMDLEN];
+       char *p;
+       struct strlist *sl;
+
+       sl = strlist__new(true, NULL);
+
+       fp = fdopen(dup(fd), "r");
+       while (!feof(fp)) {
+               p = fgets(buf, MAX_CMDLEN, fp);
+               if (!p)
+                       break;
+
+               idx = strlen(p) - 1;
+               if (p[idx] == '\n')
+                       p[idx] = '\0';
+               ret = strlist__add(sl, buf);
+               if (ret < 0)
+                       die("strlist__add failed: %s", strerror(-ret));
+       }
+       fclose(fp);
+
+       return sl;
+}
+
+/* Free and zero clear probe_point */
+static void clear_probe_point(struct probe_point *pp)
+{
+       int i;
+
+       if (pp->function)
+               free(pp->function);
+       if (pp->file)
+               free(pp->file);
+       for (i = 0; i < pp->nr_args; i++)
+               free(pp->args[i]);
+       if (pp->args)
+               free(pp->args);
+       for (i = 0; i < pp->found; i++)
+               free(pp->probes[i]);
+       memset(pp, 0, sizeof(pp));
+}
+
+/* List up current perf-probe events */
+void show_perf_probe_events(void)
+{
+       unsigned int i;
+       int fd;
+       char *group, *event;
+       struct probe_point pp;
+       struct strlist *rawlist;
+       struct str_node *ent;
+
+       fd = open_kprobe_events(O_RDONLY, 0);
+       rawlist = get_trace_kprobe_event_rawlist(fd);
+       close(fd);
+
+       for (i = 0; i < strlist__nr_entries(rawlist); i++) {
+               ent = strlist__entry(rawlist, i);
+               parse_trace_kprobe_event(ent->s, &group, &event, &pp);
+               synthesize_perf_probe_event(&pp);
+               printf("[%s:%s]\t%s\n", group, event, pp.probes[0]);
+               free(group);
+               free(event);
+               clear_probe_point(&pp);
+       }
+
+       strlist__delete(rawlist);
+}
+
+/* Get current perf-probe event names */
+static struct strlist *get_perf_event_names(int fd)
+{
+       unsigned int i;
+       char *group, *event;
+       struct strlist *sl, *rawlist;
+       struct str_node *ent;
+
+       rawlist = get_trace_kprobe_event_rawlist(fd);
+
+       sl = strlist__new(false, NULL);
+       for (i = 0; i < strlist__nr_entries(rawlist); i++) {
+               ent = strlist__entry(rawlist, i);
+               parse_trace_kprobe_event(ent->s, &group, &event, NULL);
+               strlist__add(sl, event);
+               free(group);
+       }
+
+       strlist__delete(rawlist);
+
+       return sl;
+}
+
+static int write_trace_kprobe_event(int fd, const char *buf)
+{
+       int ret;
+
+       ret = write(fd, buf, strlen(buf));
+       if (ret <= 0)
+               die("Failed to create event.");
+       else
+               printf("Added new event: %s\n", buf);
+
+       return ret;
+}
+
+static void get_new_event_name(char *buf, size_t len, const char *base,
+                              struct strlist *namelist)
+{
+       int i, ret;
+       for (i = 0; i < MAX_EVENT_INDEX; i++) {
+               ret = e_snprintf(buf, len, "%s_%d", base, i);
+               if (ret < 0)
+                       die("snprintf() failed: %s", strerror(-ret));
+               if (!strlist__has_entry(namelist, buf))
+                       break;
+       }
+       if (i == MAX_EVENT_INDEX)
+               die("Too many events are on the same function.");
+}
+
+void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
+{
+       int i, j, fd;
+       struct probe_point *pp;
+       char buf[MAX_CMDLEN];
+       char event[64];
+       struct strlist *namelist;
+
+       fd = open_kprobe_events(O_RDWR, O_APPEND);
+       /* Get current event names */
+       namelist = get_perf_event_names(fd);
+
+       for (j = 0; j < nr_probes; j++) {
+               pp = probes + j;
+               for (i = 0; i < pp->found; i++) {
+                       /* Get an unused new event name */
+                       get_new_event_name(event, 64, pp->function, namelist);
+                       snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s\n",
+                                pp->retprobe ? 'r' : 'p',
+                                PERFPROBE_GROUP, event,
+                                pp->probes[i]);
+                       write_trace_kprobe_event(fd, buf);
+                       /* Add added event name to namelist */
+                       strlist__add(namelist, event);
+               }
+       }
+       close(fd);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
new file mode 100644 (file)
index 0000000..0c6fe56
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef _PROBE_EVENT_H
+#define _PROBE_EVENT_H
+
+#include "probe-finder.h"
+#include "strlist.h"
+
+extern int parse_perf_probe_event(const char *str, struct probe_point *pp);
+extern int synthesize_perf_probe_event(struct probe_point *pp);
+extern void parse_trace_kprobe_event(const char *str, char **group,
+                                    char **event, struct probe_point *pp);
+extern int synthesize_trace_kprobe_event(struct probe_point *pp);
+extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes);
+extern void show_perf_probe_events(void);
+
+/* Maximum index number of event-name postfix */
+#define MAX_EVENT_INDEX        1024
+
+#endif /*_PROBE_EVENT_H */
index 227043577e06b4800a094d8e8e7f414d46d950e4..f24a8cc933d5f1aeede2875f774efc00b8610328 100644 (file)
@@ -1,5 +1,3 @@
-#include <string.h>
-#include <stdlib.h>
 #include "string.h"
 #include "util.h"
 
@@ -127,3 +125,104 @@ out_err:
 out:
        return length;
 }
+
+/*
+ * Helper function for splitting a string into an argv-like array.
+ * originaly copied from lib/argv_split.c
+ */
+static const char *skip_sep(const char *cp)
+{
+       while (*cp && isspace(*cp))
+               cp++;
+
+       return cp;
+}
+
+static const char *skip_arg(const char *cp)
+{
+       while (*cp && !isspace(*cp))
+               cp++;
+
+       return cp;
+}
+
+static int count_argc(const char *str)
+{
+       int count = 0;
+
+       while (*str) {
+               str = skip_sep(str);
+               if (*str) {
+                       count++;
+                       str = skip_arg(str);
+               }
+       }
+
+       return count;
+}
+
+/**
+ * argv_free - free an argv
+ * @argv - the argument vector to be freed
+ *
+ * Frees an argv and the strings it points to.
+ */
+void argv_free(char **argv)
+{
+       char **p;
+       for (p = argv; *p; p++)
+               free(*p);
+
+       free(argv);
+}
+
+/**
+ * argv_split - split a string at whitespace, returning an argv
+ * @str: the string to be split
+ * @argcp: returned argument count
+ *
+ * Returns an array of pointers to strings which are split out from
+ * @str.  This is performed by strictly splitting on white-space; no
+ * quote processing is performed.  Multiple whitespace characters are
+ * considered to be a single argument separator.  The returned array
+ * is always NULL-terminated.  Returns NULL on memory allocation
+ * failure.
+ */
+char **argv_split(const char *str, int *argcp)
+{
+       int argc = count_argc(str);
+       char **argv = zalloc(sizeof(*argv) * (argc+1));
+       char **argvp;
+
+       if (argv == NULL)
+               goto out;
+
+       if (argcp)
+               *argcp = argc;
+
+       argvp = argv;
+
+       while (*str) {
+               str = skip_sep(str);
+
+               if (*str) {
+                       const char *p = str;
+                       char *t;
+
+                       str = skip_arg(str);
+
+                       t = strndup(p, str-p);
+                       if (t == NULL)
+                               goto fail;
+                       *argvp++ = t;
+               }
+       }
+       *argvp = NULL;
+
+out:
+       return argv;
+
+fail:
+       argv_free(argv);
+       return NULL;
+}
index e50b07f80827b7d7d0cf3a35cb4fef444e033003..bfecec265a1a423e3e2816b725f0540f234f5be4 100644 (file)
@@ -6,6 +6,8 @@
 int hex2u64(const char *ptr, u64 *val);
 char *strxfrchar(char *s, char from, char to);
 s64 perf_atoll(const char *str);
+char **argv_split(const char *str, int *argcp);
+void argv_free(char **argv);
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
index 1b77e81b38de2fe7008fc09cd51aa753f4959ff9..fffcb937cdcb207f470f1bf335f1e316e283bc12 100644 (file)
@@ -6,11 +6,17 @@
 
 #include "debug.h"
 
+#include <asm/bug.h>
 #include <libelf.h>
 #include <gelf.h>
 #include <elf.h>
+#include <limits.h>
 #include <sys/utsname.h>
 
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
 enum dso_origin {
        DSO__ORIG_KERNEL = 0,
        DSO__ORIG_JAVA_JIT,
@@ -22,17 +28,37 @@ enum dso_origin {
        DSO__ORIG_NOT_FOUND,
 };
 
-static void dsos__add(struct dso *dso);
-static struct dso *dsos__find(const char *name);
-static struct map *map__new2(u64 start, struct dso *dso);
-static void kernel_maps__insert(struct map *map);
+static void dsos__add(struct list_head *head, struct dso *dso);
+static struct map *thread__find_map_by_name(struct thread *self, char *name);
+static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
+struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
+static int dso__load_kernel_sym(struct dso *self, struct map *map,
+                               struct thread *thread, symbol_filter_t filter);
 unsigned int symbol__priv_size;
+static int vmlinux_path__nr_entries;
+static char **vmlinux_path;
+
+static struct symbol_conf symbol_conf__defaults = {
+       .use_modules      = true,
+       .try_vmlinux_path = true,
+};
+
+static struct thread kthread_mem;
+struct thread *kthread = &kthread_mem;
+
+bool dso__loaded(const struct dso *self, enum map_type type)
+{
+       return self->loaded & (1 << type);
+}
 
-static struct rb_root kernel_maps;
+static void dso__set_loaded(struct dso *self, enum map_type type)
+{
+       self->loaded |= (1 << type);
+}
 
-static void dso__fixup_sym_end(struct dso *self)
+static void symbols__fixup_end(struct rb_root *self)
 {
-       struct rb_node *nd, *prevnd = rb_first(&self->syms);
+       struct rb_node *nd, *prevnd = rb_first(self);
        struct symbol *curr, *prev;
 
        if (prevnd == NULL)
@@ -53,10 +79,10 @@ static void dso__fixup_sym_end(struct dso *self)
                curr->end = roundup(curr->start, 4096);
 }
 
-static void kernel_maps__fixup_end(void)
+static void __thread__fixup_maps_end(struct thread *self, enum map_type type)
 {
        struct map *prev, *curr;
-       struct rb_node *nd, *prevnd = rb_first(&kernel_maps);
+       struct rb_node *nd, *prevnd = rb_first(&self->maps[type]);
 
        if (prevnd == NULL)
                return;
@@ -69,25 +95,31 @@ static void kernel_maps__fixup_end(void)
                prev->end = curr->start - 1;
        }
 
-       nd = rb_last(&curr->dso->syms);
-       if (nd) {
-               struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
-               curr->end = sym->end;
-       }
+       /*
+        * We still haven't the actual symbols, so guess the
+        * last map final address.
+        */
+       curr->end = ~0UL;
+}
+
+static void thread__fixup_maps_end(struct thread *self)
+{
+       int i;
+       for (i = 0; i < MAP__NR_TYPES; ++i)
+               __thread__fixup_maps_end(self, i);
 }
 
 static struct symbol *symbol__new(u64 start, u64 len, const char *name)
 {
        size_t namelen = strlen(name) + 1;
-       struct symbol *self = calloc(1, (symbol__priv_size +
-                                        sizeof(*self) + namelen));
-       if (!self)
+       struct symbol *self = zalloc(symbol__priv_size +
+                                    sizeof(*self) + namelen);
+       if (self == NULL)
                return NULL;
 
-       if (symbol__priv_size) {
-               memset(self, 0, symbol__priv_size);
+       if (symbol__priv_size)
                self = ((void *)self) + symbol__priv_size;
-       }
+
        self->start = start;
        self->end   = len ? start + len - 1 : start;
 
@@ -109,15 +141,30 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp)
                       self->start, self->end, self->name);
 }
 
+static void dso__set_long_name(struct dso *self, char *name)
+{
+       if (name == NULL)
+               return;
+       self->long_name = name;
+       self->long_name_len = strlen(name);
+}
+
+static void dso__set_basename(struct dso *self)
+{
+       self->short_name = basename(self->long_name);
+}
+
 struct dso *dso__new(const char *name)
 {
        struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
 
        if (self != NULL) {
+               int i;
                strcpy(self->name, name);
-               self->long_name = self->name;
+               dso__set_long_name(self, self->name);
                self->short_name = self->name;
-               self->syms = RB_ROOT;
+               for (i = 0; i < MAP__NR_TYPES; ++i)
+                       self->symbols[i] = RB_ROOT;
                self->find_symbol = dso__find_symbol;
                self->slen_calculated = 0;
                self->origin = DSO__ORIG_NOT_FOUND;
@@ -128,22 +175,24 @@ struct dso *dso__new(const char *name)
        return self;
 }
 
-static void dso__delete_symbols(struct dso *self)
+static void symbols__delete(struct rb_root *self)
 {
        struct symbol *pos;
-       struct rb_node *next = rb_first(&self->syms);
+       struct rb_node *next = rb_first(self);
 
        while (next) {
                pos = rb_entry(next, struct symbol, rb_node);
                next = rb_next(&pos->rb_node);
-               rb_erase(&pos->rb_node, &self->syms);
+               rb_erase(&pos->rb_node, self);
                symbol__delete(pos);
        }
 }
 
 void dso__delete(struct dso *self)
 {
-       dso__delete_symbols(self);
+       int i;
+       for (i = 0; i < MAP__NR_TYPES; ++i)
+               symbols__delete(&self->symbols[i]);
        if (self->long_name != self->name)
                free(self->long_name);
        free(self);
@@ -155,9 +204,9 @@ void dso__set_build_id(struct dso *self, void *build_id)
        self->has_build_id = 1;
 }
 
-static void dso__insert_symbol(struct dso *self, struct symbol *sym)
+static void symbols__insert(struct rb_root *self, struct symbol *sym)
 {
-       struct rb_node **p = &self->syms.rb_node;
+       struct rb_node **p = &self->rb_node;
        struct rb_node *parent = NULL;
        const u64 ip = sym->start;
        struct symbol *s;
@@ -171,17 +220,17 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym)
                        p = &(*p)->rb_right;
        }
        rb_link_node(&sym->rb_node, parent, p);
-       rb_insert_color(&sym->rb_node, &self->syms);
+       rb_insert_color(&sym->rb_node, self);
 }
 
-struct symbol *dso__find_symbol(struct dso *self, u64 ip)
+static struct symbol *symbols__find(struct rb_root *self, u64 ip)
 {
        struct rb_node *n;
 
        if (self == NULL)
                return NULL;
 
-       n = self->syms.rb_node;
+       n = self->rb_node;
 
        while (n) {
                struct symbol *s = rb_entry(n, struct symbol, rb_node);
@@ -197,6 +246,11 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip)
        return NULL;
 }
 
+struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr)
+{
+       return symbols__find(&self->symbols[type], addr);
+}
+
 int build_id__sprintf(u8 *self, int len, char *bf)
 {
        char *bid = bf;
@@ -220,15 +274,14 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp)
        return fprintf(fp, "%s", sbuild_id);
 }
 
-size_t dso__fprintf(struct dso *self, FILE *fp)
+size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
 {
        struct rb_node *nd;
        size_t ret = fprintf(fp, "dso: %s (", self->short_name);
 
        ret += dso__fprintf_buildid(self, fp);
        ret += fprintf(fp, ")\n");
-
-       for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
+       for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) {
                struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
                ret += symbol__fprintf(pos, fp);
        }
@@ -241,10 +294,11 @@ size_t dso__fprintf(struct dso *self, FILE *fp)
  * so that we can in the next step set the symbol ->end address and then
  * call kernel_maps__split_kallsyms.
  */
-static int kernel_maps__load_all_kallsyms(void)
+static int dso__load_all_kallsyms(struct dso *self, struct map *map)
 {
        char *line = NULL;
        size_t n;
+       struct rb_root *root = &self->symbols[map->type];
        FILE *file = fopen("/proc/kallsyms", "r");
 
        if (file == NULL)
@@ -287,13 +341,11 @@ static int kernel_maps__load_all_kallsyms(void)
 
                if (sym == NULL)
                        goto out_delete_line;
-
                /*
                 * We will pass the symbols to the filter later, in
-                * kernel_maps__split_kallsyms, when we have split the
-                * maps per module
+                * map__split_kallsyms, when we have split the maps per module
                 */
-               dso__insert_symbol(kernel_map->dso, sym);
+               symbols__insert(root, sym);
        }
 
        free(line);
@@ -312,12 +364,14 @@ out_failure:
  * kernel range is broken in several maps, named [kernel].N, as we don't have
  * the original ELF section names vmlinux have.
  */
-static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules)
+static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread *thread,
+                              symbol_filter_t filter)
 {
-       struct map *map = kernel_map;
+       struct map *curr_map = map;
        struct symbol *pos;
        int count = 0;
-       struct rb_node *next = rb_first(&kernel_map->dso->syms);
+       struct rb_root *root = &self->symbols[map->type];
+       struct rb_node *next = rb_first(root);
        int kernel_range = 0;
 
        while (next) {
@@ -328,16 +382,16 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules)
 
                module = strchr(pos->name, '\t');
                if (module) {
-                       if (!use_modules)
-                               goto delete_symbol;
+                       if (!thread->use_modules)
+                               goto discard_symbol;
 
                        *module++ = '\0';
 
-                       if (strcmp(map->dso->name, module)) {
-                               map = kernel_maps__find_by_dso_name(module);
-                               if (!map) {
-                                       pr_err("/proc/{kallsyms,modules} "
-                                              "inconsistency!\n");
+                       if (strcmp(self->name, module)) {
+                               curr_map = thread__find_map_by_name(thread, module);
+                               if (curr_map == NULL) {
+                                       pr_debug("/proc/{kallsyms,modules} "
+                                                "inconsistency!\n");
                                        return -1;
                                }
                        }
@@ -345,9 +399,9 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules)
                         * So that we look just like we get from .ko files,
                         * i.e. not prelinked, relative to map->start.
                         */
-                       pos->start = map->map_ip(map, pos->start);
-                       pos->end   = map->map_ip(map, pos->end);
-               } else if (map != kernel_map) {
+                       pos->start = curr_map->map_ip(curr_map, pos->start);
+                       pos->end   = curr_map->map_ip(curr_map, pos->end);
+               } else if (curr_map != map) {
                        char dso_name[PATH_MAX];
                        struct dso *dso;
 
@@ -358,25 +412,24 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules)
                        if (dso == NULL)
                                return -1;
 
-                       map = map__new2(pos->start, dso);
+                       curr_map = map__new2(pos->start, dso, map->type);
                        if (map == NULL) {
                                dso__delete(dso);
                                return -1;
                        }
 
-                       map->map_ip = map->unmap_ip = identity__map_ip;
-                       kernel_maps__insert(map);
+                       curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+                       __thread__insert_map(thread, curr_map);
                        ++kernel_range;
                }
 
-               if (filter && filter(map, pos)) {
-delete_symbol:
-                       rb_erase(&pos->rb_node, &kernel_map->dso->syms);
+               if (filter && filter(curr_map, pos)) {
+discard_symbol:                rb_erase(&pos->rb_node, root);
                        symbol__delete(pos);
                } else {
-                       if (map != kernel_map) {
-                               rb_erase(&pos->rb_node, &kernel_map->dso->syms);
-                               dso__insert_symbol(map->dso, pos);
+                       if (curr_map != map) {
+                               rb_erase(&pos->rb_node, root);
+                               symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
                        }
                        count++;
                }
@@ -386,32 +439,22 @@ delete_symbol:
 }
 
 
-static int kernel_maps__load_kallsyms(symbol_filter_t filter, int use_modules)
+static int dso__load_kallsyms(struct dso *self, struct map *map,
+                             struct thread *thread, symbol_filter_t filter)
 {
-       if (kernel_maps__load_all_kallsyms())
+       if (dso__load_all_kallsyms(self, map) < 0)
                return -1;
 
-       dso__fixup_sym_end(kernel_map->dso);
+       symbols__fixup_end(&self->symbols[map->type]);
+       self->origin = DSO__ORIG_KERNEL;
 
-       return kernel_maps__split_kallsyms(filter, use_modules);
+       return dso__split_kallsyms(self, map, thread, filter);
 }
 
-static size_t kernel_maps__fprintf(FILE *fp)
+size_t kernel_maps__fprintf(FILE *fp)
 {
        size_t printed = fprintf(fp, "Kernel maps:\n");
-       struct rb_node *nd;
-
-       for (nd = rb_first(&kernel_maps); nd; nd = rb_next(nd)) {
-               struct map *pos = rb_entry(nd, struct map, rb_node);
-
-               printed += fprintf(fp, "Map:");
-               printed += map__fprintf(pos, fp);
-               if (verbose > 1) {
-                       printed += dso__fprintf(pos->dso, fp);
-                       printed += fprintf(fp, "--\n");
-               }
-       }
-
+       printed += thread__fprintf_maps(kthread, fp);
        return printed + fprintf(fp, "END kernel maps\n");
 }
 
@@ -461,7 +504,7 @@ static int dso__load_perf_map(struct dso *self, struct map *map,
                if (filter && filter(map, sym))
                        symbol__delete(sym);
                else {
-                       dso__insert_symbol(self, sym);
+                       symbols__insert(&self->symbols[map->type], sym);
                        nr_syms++;
                }
        }
@@ -659,7 +702,7 @@ static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
                        if (filter && filter(map, f))
                                symbol__delete(f);
                        else {
-                               dso__insert_symbol(self, f);
+                               symbols__insert(&self->symbols[map->type], f);
                                ++nr;
                        }
                }
@@ -681,7 +724,7 @@ static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
                        if (filter && filter(map, f))
                                symbol__delete(f);
                        else {
-                               dso__insert_symbol(self, f);
+                               symbols__insert(&self->symbols[map->type], f);
                                ++nr;
                        }
                }
@@ -701,9 +744,9 @@ out:
        return 0;
 }
 
-static int dso__load_sym(struct dso *self, struct map *map, const char *name,
-                        int fd, symbol_filter_t filter, int kernel,
-                        int kmodule)
+static int dso__load_sym(struct dso *self, struct map *map,
+                        struct thread *thread, const char *name, int fd,
+                        symbol_filter_t filter, int kernel, int kmodule)
 {
        struct map *curr_map = map;
        struct dso *curr_dso = self;
@@ -806,7 +849,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
                        snprintf(dso_name, sizeof(dso_name),
                                 "%s%s", self->short_name, section_name);
 
-                       curr_map = kernel_maps__find_by_dso_name(dso_name);
+                       curr_map = thread__find_map_by_name(thread, dso_name);
                        if (curr_map == NULL) {
                                u64 start = sym.st_value;
 
@@ -816,7 +859,8 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
                                curr_dso = dso__new(dso_name);
                                if (curr_dso == NULL)
                                        goto out_elf_end;
-                               curr_map = map__new2(start, curr_dso);
+                               curr_map = map__new2(start, curr_dso,
+                                                    MAP__FUNCTION);
                                if (curr_map == NULL) {
                                        dso__delete(curr_dso);
                                        goto out_elf_end;
@@ -824,8 +868,8 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
                                curr_map->map_ip = identity__map_ip;
                                curr_map->unmap_ip = identity__map_ip;
                                curr_dso->origin = DSO__ORIG_KERNEL;
-                               kernel_maps__insert(curr_map);
-                               dsos__add(curr_dso);
+                               __thread__insert_map(kthread, curr_map);
+                               dsos__add(&dsos__kernel, curr_dso);
                        } else
                                curr_dso = curr_map->dso;
 
@@ -855,7 +899,7 @@ new_symbol:
                if (filter && filter(curr_map, f))
                        symbol__delete(f);
                else {
-                       dso__insert_symbol(curr_dso, f);
+                       symbols__insert(&curr_dso->symbols[curr_map->type], f);
                        nr++;
                }
        }
@@ -864,7 +908,7 @@ new_symbol:
         * For misannotated, zeroed, ASM function sizes.
         */
        if (nr > 0)
-               dso__fixup_sym_end(self);
+               symbols__fixup_end(&self->symbols[map->type]);
        err = nr;
 out_elf_end:
        elf_end(elf);
@@ -872,47 +916,46 @@ out_close:
        return err;
 }
 
-bool fetch_build_id_table(struct list_head *head)
+static bool dso__build_id_equal(const struct dso *self, u8 *build_id)
 {
-       bool have_buildid = false;
-       struct dso *pos;
-
-       list_for_each_entry(pos, &dsos, node) {
-               struct build_id_list *new;
-               struct build_id_event b;
-               size_t len;
-
-               if (filename__read_build_id(pos->long_name,
-                                           &b.build_id,
-                                           sizeof(b.build_id)) < 0)
-                       continue;
-               have_buildid = true;
-               memset(&b.header, 0, sizeof(b.header));
-               len = strlen(pos->long_name) + 1;
-               len = ALIGN(len, 64);
-               b.header.size = sizeof(b) + len;
+       return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0;
+}
 
-               new = malloc(sizeof(*new));
-               if (!new)
-                       die("No memory\n");
+static bool __dsos__read_build_ids(struct list_head *head)
+{
+       bool have_build_id = false;
+       struct dso *pos;
 
-               memcpy(&new->event, &b, sizeof(b));
-               new->dso_name = pos->long_name;
-               new->len = len;
+       list_for_each_entry(pos, head, node)
+               if (filename__read_build_id(pos->long_name, pos->build_id,
+                                           sizeof(pos->build_id)) > 0) {
+                       have_build_id     = true;
+                       pos->has_build_id = true;
+               }
 
-               list_add_tail(&new->list, head);
-       }
+       return have_build_id;
+}
 
-       return have_buildid;
+bool dsos__read_build_ids(void)
+{
+       return __dsos__read_build_ids(&dsos__kernel) ||
+              __dsos__read_build_ids(&dsos__user);
 }
 
+/*
+ * Align offset to 4 bytes as needed for note name and descriptor data.
+ */
+#define NOTE_ALIGN(n) (((n) + 3) & -4U)
+
 int filename__read_build_id(const char *filename, void *bf, size_t size)
 {
        int fd, err = -1;
        GElf_Ehdr ehdr;
        GElf_Shdr shdr;
-       Elf_Data *build_id_data;
+       Elf_Data *data;
        Elf_Scn *sec;
+       Elf_Kind ek;
+       void *ptr;
        Elf *elf;
 
        if (size < BUILD_ID_SIZE)
@@ -928,6 +971,10 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
                goto out_close;
        }
 
+       ek = elf_kind(elf);
+       if (ek != ELF_K_ELF)
+               goto out_elf_end;
+
        if (gelf_getehdr(elf, &ehdr) == NULL) {
                pr_err("%s: cannot get elf header.\n", __func__);
                goto out_elf_end;
@@ -935,14 +982,37 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
 
        sec = elf_section_by_name(elf, &ehdr, &shdr,
                                  ".note.gnu.build-id", NULL);
-       if (sec == NULL)
-               goto out_elf_end;
+       if (sec == NULL) {
+               sec = elf_section_by_name(elf, &ehdr, &shdr,
+                                         ".notes", NULL);
+               if (sec == NULL)
+                       goto out_elf_end;
+       }
 
-       build_id_data = elf_getdata(sec, NULL);
-       if (build_id_data == NULL)
+       data = elf_getdata(sec, NULL);
+       if (data == NULL)
                goto out_elf_end;
-       memcpy(bf, build_id_data->d_buf + 16, BUILD_ID_SIZE);
-       err = BUILD_ID_SIZE;
+
+       ptr = data->d_buf;
+       while (ptr < (data->d_buf + data->d_size)) {
+               GElf_Nhdr *nhdr = ptr;
+               int namesz = NOTE_ALIGN(nhdr->n_namesz),
+                   descsz = NOTE_ALIGN(nhdr->n_descsz);
+               const char *name;
+
+               ptr += sizeof(*nhdr);
+               name = ptr;
+               ptr += namesz;
+               if (nhdr->n_type == NT_GNU_BUILD_ID &&
+                   nhdr->n_namesz == sizeof("GNU")) {
+                       if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+                               memcpy(bf, ptr, BUILD_ID_SIZE);
+                               err = BUILD_ID_SIZE;
+                               break;
+                       }
+               }
+               ptr += descsz;
+       }
 out_elf_end:
        elf_end(elf);
 out_close:
@@ -951,23 +1021,48 @@ out:
        return err;
 }
 
-static char *dso__read_build_id(struct dso *self)
+int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
 {
-       int len;
-       char *build_id = NULL;
-       unsigned char rawbf[BUILD_ID_SIZE];
+       int fd, err = -1;
 
-       len = filename__read_build_id(self->long_name, rawbf, sizeof(rawbf));
-       if (len < 0)
+       if (size < BUILD_ID_SIZE)
                goto out;
 
-       build_id = malloc(len * 2 + 1);
-       if (build_id == NULL)
+       fd = open(filename, O_RDONLY);
+       if (fd < 0)
                goto out;
 
-       build_id__sprintf(rawbf, len, build_id);
+       while (1) {
+               char bf[BUFSIZ];
+               GElf_Nhdr nhdr;
+               int namesz, descsz;
+
+               if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr))
+                       break;
+
+               namesz = NOTE_ALIGN(nhdr.n_namesz);
+               descsz = NOTE_ALIGN(nhdr.n_descsz);
+               if (nhdr.n_type == NT_GNU_BUILD_ID &&
+                   nhdr.n_namesz == sizeof("GNU")) {
+                       if (read(fd, bf, namesz) != namesz)
+                               break;
+                       if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
+                               if (read(fd, build_id,
+                                   BUILD_ID_SIZE) == BUILD_ID_SIZE) {
+                                       err = 0;
+                                       break;
+                               }
+                       } else if (read(fd, bf, descsz) != descsz)
+                               break;
+               } else {
+                       int n = namesz + descsz;
+                       if (read(fd, bf, n) != n)
+                               break;
+               }
+       }
+       close(fd);
 out:
-       return build_id;
+       return err;
 }
 
 char dso__symtab_origin(const struct dso *self)
@@ -990,12 +1085,17 @@ char dso__symtab_origin(const struct dso *self)
 int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
 {
        int size = PATH_MAX;
-       char *name = malloc(size), *build_id = NULL;
+       char *name;
+       u8 build_id[BUILD_ID_SIZE];
        int ret = -1;
        int fd;
 
-       self->loaded = 1;
+       dso__set_loaded(self, map->type);
+
+       if (self->kernel)
+               return dso__load_kernel_sym(self, map, kthread, filter);
 
+       name = malloc(size);
        if (!name)
                return -1;
 
@@ -1012,8 +1112,6 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
 
 more:
        do {
-               int berr = 0;
-
                self->origin++;
                switch (self->origin) {
                case DSO__ORIG_FEDORA:
@@ -1025,12 +1123,18 @@ more:
                                 self->long_name);
                        break;
                case DSO__ORIG_BUILDID:
-                       build_id = dso__read_build_id(self);
-                       if (build_id != NULL) {
+                       if (filename__read_build_id(self->long_name, build_id,
+                                                   sizeof(build_id))) {
+                               char build_id_hex[BUILD_ID_SIZE * 2 + 1];
+
+                               build_id__sprintf(build_id, sizeof(build_id),
+                                                 build_id_hex);
                                snprintf(name, size,
                                         "/usr/lib/debug/.build-id/%.2s/%s.debug",
-                                       build_id, build_id + 2);
-                               goto compare_build_id;
+                                       build_id_hex, build_id_hex + 2);
+                               if (self->has_build_id)
+                                       goto compare_build_id;
+                               break;
                        }
                        self->origin++;
                        /* Fall thru */
@@ -1043,25 +1147,18 @@ more:
                }
 
                if (self->has_build_id) {
-                       bool match;
-                       build_id = malloc(BUILD_ID_SIZE);
-                       if (build_id == NULL)
+                       if (filename__read_build_id(name, build_id,
+                                                   sizeof(build_id)) < 0)
                                goto more;
-                       berr = filename__read_build_id(name, build_id,
-                                                      BUILD_ID_SIZE);
 compare_build_id:
-                       match = berr > 0 && memcmp(build_id, self->build_id,
-                                                  sizeof(self->build_id)) == 0;
-                       free(build_id);
-                       build_id = NULL;
-                       if (!match)
+                       if (!dso__build_id_equal(self, build_id))
                                goto more;
                }
 
                fd = open(name, O_RDONLY);
        } while (fd < 0);
 
-       ret = dso__load_sym(self, map, name, fd, filter, 0, 0);
+       ret = dso__load_sym(self, map, NULL, name, fd, filter, 0, 0);
        close(fd);
 
        /*
@@ -1082,33 +1179,11 @@ out:
        return ret;
 }
 
-struct map *kernel_map;
-
-static void kernel_maps__insert(struct map *map)
-{
-       maps__insert(&kernel_maps, map);
-}
-
-struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp)
-{
-       struct map *map = maps__find(&kernel_maps, ip);
-
-       if (mapp)
-               *mapp = map;
-
-       if (map) {
-               ip = map->map_ip(map, ip);
-               return map->dso->find_symbol(map->dso, ip);
-       }
-
-       return NULL;
-}
-
-struct map *kernel_maps__find_by_dso_name(const char *name)
+static struct map *thread__find_map_by_name(struct thread *self, char *name)
 {
        struct rb_node *nd;
 
-       for (nd = rb_first(&kernel_maps); nd; nd = rb_next(nd)) {
+       for (nd = rb_first(&self->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
                struct map *map = rb_entry(nd, struct map, rb_node);
 
                if (map->dso && strcmp(map->dso->name, name) == 0)
@@ -1118,32 +1193,13 @@ struct map *kernel_maps__find_by_dso_name(const char *name)
        return NULL;
 }
 
-static int dso__load_module_sym(struct dso *self, struct map *map,
-                               symbol_filter_t filter)
-{
-       int err = 0, fd = open(self->long_name, O_RDONLY);
-
-       self->loaded = 1;
-
-       if (fd < 0) {
-               pr_err("%s: cannot open %s\n", __func__, self->long_name);
-               return err;
-       }
-
-       err = dso__load_sym(self, map, self->long_name, fd, filter, 0, 1);
-       close(fd);
-
-       return err;
-}
-
-static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter)
+static int dsos__set_modules_path_dir(char *dirname)
 {
        struct dirent *dent;
-       int nr_symbols = 0, err;
        DIR *dir = opendir(dirname);
 
        if (!dir) {
-               pr_err("%s: cannot open %s dir\n", __func__, dirname);
+               pr_debug("%s: cannot open %s dir\n", __func__, dirname);
                return -1;
        }
 
@@ -1157,14 +1213,13 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter)
 
                        snprintf(path, sizeof(path), "%s/%s",
                                 dirname, dent->d_name);
-                       err = dsos__load_modules_sym_dir(path, filter);
-                       if (err < 0)
+                       if (dsos__set_modules_path_dir(path) < 0)
                                goto failure;
                } else {
                        char *dot = strrchr(dent->d_name, '.'),
                             dso_name[PATH_MAX];
                        struct map *map;
-                       struct rb_node *last;
+                       char *long_name;
 
                        if (dot == NULL || strcmp(dot, ".ko"))
                                continue;
@@ -1172,45 +1227,27 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter)
                                 (int)(dot - dent->d_name), dent->d_name);
 
                        strxfrchar(dso_name, '-', '_');
-                       map = kernel_maps__find_by_dso_name(dso_name);
+                       map = thread__find_map_by_name(kthread, dso_name);
                        if (map == NULL)
                                continue;
 
                        snprintf(path, sizeof(path), "%s/%s",
                                 dirname, dent->d_name);
 
-                       map->dso->long_name = strdup(path);
-                       if (map->dso->long_name == NULL)
-                               goto failure;
-
-                       err = dso__load_module_sym(map->dso, map, filter);
-                       if (err < 0)
+                       long_name = strdup(path);
+                       if (long_name == NULL)
                                goto failure;
-                       last = rb_last(&map->dso->syms);
-                       if (last) {
-                               struct symbol *sym;
-                               /*
-                                * We do this here as well, even having the
-                                * symbol size found in the symtab because
-                                * misannotated ASM symbols may have the size
-                                * set to zero.
-                                */
-                               dso__fixup_sym_end(map->dso);
-
-                               sym = rb_entry(last, struct symbol, rb_node);
-                               map->end = map->start + sym->end;
-                       }
+                       dso__set_long_name(map->dso, long_name);
                }
-               nr_symbols += err;
        }
 
-       return nr_symbols;
+       return 0;
 failure:
        closedir(dir);
        return -1;
 }
 
-static int dsos__load_modules_sym(symbol_filter_t filter)
+static int dsos__set_modules_path(void)
 {
        struct utsname uts;
        char modules_path[PATH_MAX];
@@ -1221,7 +1258,7 @@ static int dsos__load_modules_sym(symbol_filter_t filter)
        snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel",
                 uts.release);
 
-       return dsos__load_modules_sym_dir(modules_path, filter);
+       return dsos__set_modules_path_dir(modules_path);
 }
 
 /*
@@ -1229,7 +1266,7 @@ static int dsos__load_modules_sym(symbol_filter_t filter)
  * they are loaded) and for vmlinux, where only after we load all the
  * symbols we'll know where it starts and ends.
  */
-static struct map *map__new2(u64 start, struct dso *dso)
+static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 {
        struct map *self = malloc(sizeof(*self));
 
@@ -1237,13 +1274,13 @@ static struct map *map__new2(u64 start, struct dso *dso)
                /*
                 * ->end will be filled after we load all the symbols
                 */
-               map__init(self, start, 0, 0, dso);
+               map__init(self, type, start, 0, 0, dso);
        }
 
        return self;
 }
 
-static int dsos__load_modules(void)
+static int thread__create_module_maps(struct thread *self)
 {
        char *line = NULL;
        size_t n;
@@ -1287,21 +1324,27 @@ static int dsos__load_modules(void)
                if (dso == NULL)
                        goto out_delete_line;
 
-               map = map__new2(start, dso);
+               map = map__new2(start, dso, MAP__FUNCTION);
                if (map == NULL) {
                        dso__delete(dso);
                        goto out_delete_line;
                }
 
+               snprintf(name, sizeof(name),
+                        "/sys/module/%s/notes/.note.gnu.build-id", line);
+               if (sysfs__read_build_id(name, dso->build_id,
+                                        sizeof(dso->build_id)) == 0)
+                       dso->has_build_id = true;
+
                dso->origin = DSO__ORIG_KMODULE;
-               kernel_maps__insert(map);
-               dsos__add(dso);
+               __thread__insert_map(self, map);
+               dsos__add(&dsos__kernel, dso);
        }
 
        free(line);
        fclose(file);
 
-       return 0;
+       return dsos__set_modules_path();
 
 out_delete_line:
        free(line);
@@ -1309,106 +1352,106 @@ out_failure:
        return -1;
 }
 
-static int dso__load_vmlinux(struct dso *self, struct map *map,
+static int dso__load_vmlinux(struct dso *self, struct map *map, struct thread *thread,
                             const char *vmlinux, symbol_filter_t filter)
 {
-       int err, fd = open(vmlinux, O_RDONLY);
+       int err = -1, fd;
+
+       if (self->has_build_id) {
+               u8 build_id[BUILD_ID_SIZE];
 
-       self->loaded = 1;
+               if (filename__read_build_id(vmlinux, build_id,
+                                           sizeof(build_id)) < 0) {
+                       pr_debug("No build_id in %s, ignoring it\n", vmlinux);
+                       return -1;
+               }
+               if (!dso__build_id_equal(self, build_id)) {
+                       char expected_build_id[BUILD_ID_SIZE * 2 + 1],
+                            vmlinux_build_id[BUILD_ID_SIZE * 2 + 1];
+
+                       build_id__sprintf(self->build_id,
+                                         sizeof(self->build_id),
+                                         expected_build_id);
+                       build_id__sprintf(build_id, sizeof(build_id),
+                                         vmlinux_build_id);
+                       pr_debug("build_id in %s is %s while expected is %s, "
+                                "ignoring it\n", vmlinux, vmlinux_build_id,
+                                expected_build_id);
+                       return -1;
+               }
+       }
 
+       fd = open(vmlinux, O_RDONLY);
        if (fd < 0)
                return -1;
 
-       err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0);
-
+       dso__set_loaded(self, map->type);
+       err = dso__load_sym(self, map, thread, self->long_name, fd, filter, 1, 0);
        close(fd);
 
        return err;
 }
 
-int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter,
-                     int use_modules)
+static int dso__load_kernel_sym(struct dso *self, struct map *map,
+                               struct thread *thread, symbol_filter_t filter)
 {
-       int err = -1;
-       struct dso *dso = dso__new(vmlinux);
-
-       if (dso == NULL)
-               return -1;
-
-       dso->short_name = "[kernel]";
-       kernel_map = map__new2(0, dso);
-       if (kernel_map == NULL)
-               goto out_delete_dso;
-
-       kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip;
-
-       if (use_modules && dsos__load_modules() < 0) {
-               pr_warning("Failed to load list of modules in use! "
-                          "Continuing...\n");
-               use_modules = 0;
-       }
-
-       if (vmlinux) {
-               err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter);
-               if (err > 0 && use_modules) {
-                       int syms = dsos__load_modules_sym(filter);
-
-                       if (syms < 0)
-                               pr_warning("Failed to read module symbols!"
-                                          " Continuing...\n");
-                       else
-                               err += syms;
+       int err;
+       bool is_kallsyms;
+
+       if (vmlinux_path != NULL) {
+               int i;
+               pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+                        vmlinux_path__nr_entries);
+               for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+                       err = dso__load_vmlinux(self, map, thread,
+                                               vmlinux_path[i], filter);
+                       if (err > 0) {
+                               pr_debug("Using %s for symbols\n",
+                                        vmlinux_path[i]);
+                               dso__set_long_name(self,
+                                                  strdup(vmlinux_path[i]));
+                               goto out_fixup;
+                       }
                }
        }
 
-       if (err <= 0)
-               err = kernel_maps__load_kallsyms(filter, use_modules);
+       is_kallsyms = self->long_name[0] == '[';
+       if (is_kallsyms)
+               goto do_kallsyms;
+
+       err = dso__load_vmlinux(self, map, thread, self->long_name, filter);
+       if (err <= 0) {
+               pr_info("The file %s cannot be used, "
+                       "trying to use /proc/kallsyms...", self->long_name);
+do_kallsyms:
+               err = dso__load_kallsyms(self, map, thread, filter);
+               if (err > 0 && !is_kallsyms)
+                        dso__set_long_name(self, strdup("[kernel.kallsyms]"));
+       }
 
        if (err > 0) {
-               struct rb_node *node = rb_first(&dso->syms);
-               struct symbol *sym = rb_entry(node, struct symbol, rb_node);
-
-               kernel_map->start = sym->start;
-               node = rb_last(&dso->syms);
-               sym = rb_entry(node, struct symbol, rb_node);
-               kernel_map->end = sym->end;
-
-               dso->origin = DSO__ORIG_KERNEL;
-               kernel_maps__insert(kernel_map);
-               /*
-                * Now that we have all sorted out, just set the ->end of all
-                * maps:
-                */
-               kernel_maps__fixup_end();
-               dsos__add(dso);
-
-               if (verbose)
-                       kernel_maps__fprintf(stderr);
+out_fixup:
+               map__fixup_start(map);
+               map__fixup_end(map);
        }
 
        return err;
-
-out_delete_dso:
-       dso__delete(dso);
-       return -1;
 }
 
-LIST_HEAD(dsos);
-struct dso     *vdso;
-
-const char     *vmlinux_name = "vmlinux";
-int            modules;
+LIST_HEAD(dsos__user);
+LIST_HEAD(dsos__kernel);
+struct dso *vdso;
 
-static void dsos__add(struct dso *dso)
+static void dsos__add(struct list_head *head, struct dso *dso)
 {
-       list_add_tail(&dso->node, &dsos);
+       list_add_tail(&dso->node, head);
 }
 
-static struct dso *dsos__find(const char *name)
+static struct dso *dsos__find(struct list_head *head, const char *name)
 {
        struct dso *pos;
 
-       list_for_each_entry(pos, &dsos, node)
+       list_for_each_entry(pos, head, node)
                if (strcmp(pos->name, name) == 0)
                        return pos;
        return NULL;
@@ -1416,53 +1459,170 @@ static struct dso *dsos__find(const char *name)
 
 struct dso *dsos__findnew(const char *name)
 {
-       struct dso *dso = dsos__find(name);
+       struct dso *dso = dsos__find(&dsos__user, name);
 
        if (!dso) {
                dso = dso__new(name);
-               if (dso != NULL)
-                       dsos__add(dso);
+               if (dso != NULL) {
+                       dsos__add(&dsos__user, dso);
+                       dso__set_basename(dso);
+               }
        }
 
        return dso;
 }
 
-void dsos__fprintf(FILE *fp)
+static void __dsos__fprintf(struct list_head *head, FILE *fp)
 {
        struct dso *pos;
 
-       list_for_each_entry(pos, &dsos, node)
-               dso__fprintf(pos, fp);
+       list_for_each_entry(pos, head, node) {
+               int i;
+               for (i = 0; i < MAP__NR_TYPES; ++i)
+                       dso__fprintf(pos, i, fp);
+       }
 }
 
-size_t dsos__fprintf_buildid(FILE *fp)
+void dsos__fprintf(FILE *fp)
+{
+       __dsos__fprintf(&dsos__kernel, fp);
+       __dsos__fprintf(&dsos__user, fp);
+}
+
+static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp)
 {
        struct dso *pos;
        size_t ret = 0;
 
-       list_for_each_entry(pos, &dsos, node) {
+       list_for_each_entry(pos, head, node) {
                ret += dso__fprintf_buildid(pos, fp);
                ret += fprintf(fp, " %s\n", pos->long_name);
        }
        return ret;
 }
 
-int load_kernel(symbol_filter_t filter)
+size_t dsos__fprintf_buildid(FILE *fp)
+{
+       return (__dsos__fprintf_buildid(&dsos__kernel, fp) +
+               __dsos__fprintf_buildid(&dsos__user, fp));
+}
+
+static int thread__create_kernel_map(struct thread *self, const char *vmlinux)
 {
-       if (dsos__load_kernel(vmlinux_name, filter, modules) <= 0)
+       struct map *kmap;
+       struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]");
+
+       if (kernel == NULL)
                return -1;
 
+       kmap = map__new2(0, kernel, MAP__FUNCTION);
+       if (kmap == NULL)
+               goto out_delete_kernel_dso;
+
+       kmap->map_ip       = kmap->unmap_ip = identity__map_ip;
+       kernel->short_name = "[kernel]";
+       kernel->kernel     = 1;
+
        vdso = dso__new("[vdso]");
-       if (!vdso)
+       if (vdso == NULL)
+               goto out_delete_kernel_map;
+       dso__set_loaded(vdso, MAP__FUNCTION);
+
+       if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id,
+                                sizeof(kernel->build_id)) == 0)
+               kernel->has_build_id = true;
+
+       __thread__insert_map(self, kmap);
+       dsos__add(&dsos__kernel, kernel);
+       dsos__add(&dsos__user, vdso);
+
+       return 0;
+
+out_delete_kernel_map:
+       map__delete(kmap);
+out_delete_kernel_dso:
+       dso__delete(kernel);
+       return -1;
+}
+
+static void vmlinux_path__exit(void)
+{
+       while (--vmlinux_path__nr_entries >= 0) {
+               free(vmlinux_path[vmlinux_path__nr_entries]);
+               vmlinux_path[vmlinux_path__nr_entries] = NULL;
+       }
+
+       free(vmlinux_path);
+       vmlinux_path = NULL;
+}
+
+static int vmlinux_path__init(void)
+{
+       struct utsname uts;
+       char bf[PATH_MAX];
+
+       if (uname(&uts) < 0)
                return -1;
 
-       dsos__add(vdso);
+       vmlinux_path = malloc(sizeof(char *) * 5);
+       if (vmlinux_path == NULL)
+               return -1;
+
+       vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux");
+       if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+               goto out_fail;
+       ++vmlinux_path__nr_entries;
+       vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux");
+       if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+               goto out_fail;
+       ++vmlinux_path__nr_entries;
+       snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
+       vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+       if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+               goto out_fail;
+       ++vmlinux_path__nr_entries;
+       snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", uts.release);
+       vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+       if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+               goto out_fail;
+       ++vmlinux_path__nr_entries;
+       snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux",
+                uts.release);
+       vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+       if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+               goto out_fail;
+       ++vmlinux_path__nr_entries;
 
        return 0;
+
+out_fail:
+       vmlinux_path__exit();
+       return -1;
 }
 
-void symbol__init(unsigned int priv_size)
+int symbol__init(struct symbol_conf *conf)
 {
+       const struct symbol_conf *pconf = conf ?: &symbol_conf__defaults;
+
        elf_version(EV_CURRENT);
-       symbol__priv_size = priv_size;
+       symbol__priv_size = pconf->priv_size;
+       thread__init(kthread, 0);
+
+       if (pconf->try_vmlinux_path && vmlinux_path__init() < 0)
+               return -1;
+
+       if (thread__create_kernel_map(kthread, pconf->vmlinux_name) < 0) {
+               vmlinux_path__exit();
+               return -1;
+       }
+
+       kthread->use_modules = pconf->use_modules;
+       if (pconf->use_modules && thread__create_module_maps(kthread) < 0)
+               pr_debug("Failed to load list of modules in use, "
+                        "continuing...\n");
+       /*
+        * Now that we have all the maps created, just set the ->end of them:
+        */
+       thread__fixup_maps_end(kthread);
+       return 0;
 }
index 51c5a4a0813332326c11adbc6b1b052b1942641d..17003efa0b39ab072f6656109cddd434fe248d9a 100644 (file)
@@ -49,6 +49,13 @@ struct symbol {
        char            name[0];
 };
 
+struct symbol_conf {
+       unsigned short  priv_size;
+       bool            try_vmlinux_path,
+                       use_modules;
+       const char      *vmlinux_name;
+};
+
 extern unsigned int symbol__priv_size;
 
 static inline void *symbol__priv(struct symbol *self)
@@ -56,16 +63,27 @@ static inline void *symbol__priv(struct symbol *self)
        return ((void *)self) - symbol__priv_size;
 }
 
+struct addr_location {
+       struct thread *thread;
+       struct map    *map;
+       struct symbol *sym;
+       u64           addr;
+       char          level;
+};
+
 struct dso {
        struct list_head node;
-       struct rb_root   syms;
-       struct symbol    *(*find_symbol)(struct dso *, u64 ip);
+       struct rb_root   symbols[MAP__NR_TYPES];
+       struct symbol    *(*find_symbol)(struct dso *self,
+                                        enum map_type type, u64 addr);
        u8               adjust_symbols:1;
        u8               slen_calculated:1;
-       u8               loaded:1;
        u8               has_build_id:1;
+       u8               kernel:1;
        unsigned char    origin;
+       u8               loaded;
        u8               build_id[BUILD_ID_SIZE];
+       u16              long_name_len;
        const char       *short_name;
        char             *long_name;
        char             name[0];
@@ -74,30 +92,29 @@ struct dso {
 struct dso *dso__new(const char *name);
 void dso__delete(struct dso *self);
 
-struct symbol *dso__find_symbol(struct dso *self, u64 ip);
+bool dso__loaded(const struct dso *self, enum map_type type);
 
-int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, int modules);
 struct dso *dsos__findnew(const char *name);
 int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
 void dsos__fprintf(FILE *fp);
 size_t dsos__fprintf_buildid(FILE *fp);
 
 size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
-size_t dso__fprintf(struct dso *self, FILE *fp);
+size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
 char dso__symtab_origin(const struct dso *self);
 void dso__set_build_id(struct dso *self, void *build_id);
 
 int filename__read_build_id(const char *filename, void *bf, size_t size);
-bool fetch_build_id_table(struct list_head *head);
+int sysfs__read_build_id(const char *filename, void *bf, size_t size);
+bool dsos__read_build_ids(void);
 int build_id__sprintf(u8 *self, int len, char *bf);
 
-int load_kernel(symbol_filter_t filter);
+size_t kernel_maps__fprintf(FILE *fp);
 
-void symbol__init(unsigned int priv_size);
+int symbol__init(struct symbol_conf *conf);
 
-extern struct list_head dsos;
-extern struct map *kernel_map;
+struct thread;
+struct thread *kthread;
+extern struct list_head dsos__user, dsos__kernel;
 extern struct dso *vdso;
-extern const char *vmlinux_name;
-extern int   modules;
 #endif /* __PERF_SYMBOL */
index 0f6d78c9863ae1fc0a54701c444f303fe65072ad..603f5610861b841cc1a526dc1f1566a0c943d56f 100644 (file)
@@ -9,17 +9,26 @@
 static struct rb_root threads;
 static struct thread *last_match;
 
+void thread__init(struct thread *self, pid_t pid)
+{
+       int i;
+       self->pid = pid;
+       self->comm = NULL;
+       for (i = 0; i < MAP__NR_TYPES; ++i) {
+               self->maps[i] = RB_ROOT;
+               INIT_LIST_HEAD(&self->removed_maps[i]);
+       }
+}
+
 static struct thread *thread__new(pid_t pid)
 {
-       struct thread *self = calloc(1, sizeof(*self));
+       struct thread *self = zalloc(sizeof(*self));
 
        if (self != NULL) {
-               self->pid = pid;
+               thread__init(self, pid);
                self->comm = malloc(32);
                if (self->comm)
                        snprintf(self->comm, 32, ":%d", self->pid);
-               self->maps = RB_ROOT;
-               INIT_LIST_HEAD(&self->removed_maps);
        }
 
        return self;
@@ -44,24 +53,68 @@ int thread__comm_len(struct thread *self)
        return self->comm_len;
 }
 
-static size_t thread__fprintf(struct thread *self, FILE *fp)
+static const char *map_type__name[MAP__NR_TYPES] = {
+       [MAP__FUNCTION] = "Functions",
+};
+
+static size_t __thread__fprintf_maps(struct thread *self,
+                                    enum map_type type, FILE *fp)
 {
+       size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
        struct rb_node *nd;
-       struct map *pos;
-       size_t ret = fprintf(fp, "Thread %d %s\nCurrent maps:\n",
-                            self->pid, self->comm);
 
-       for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) {
-               pos = rb_entry(nd, struct map, rb_node);
-               ret += map__fprintf(pos, fp);
+       for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
+               struct map *pos = rb_entry(nd, struct map, rb_node);
+               printed += fprintf(fp, "Map:");
+               printed += map__fprintf(pos, fp);
+               if (verbose > 1) {
+                       printed += dso__fprintf(pos->dso, type, fp);
+                       printed += fprintf(fp, "--\n");
+               }
        }
 
-       ret = fprintf(fp, "Removed maps:\n");
+       return printed;
+}
 
-       list_for_each_entry(pos, &self->removed_maps, node)
-               ret += map__fprintf(pos, fp);
+size_t thread__fprintf_maps(struct thread *self, FILE *fp)
+{
+       size_t printed = 0, i;
+       for (i = 0; i < MAP__NR_TYPES; ++i)
+               printed += __thread__fprintf_maps(self, i, fp);
+       return printed;
+}
 
-       return ret;
+static size_t __thread__fprintf_removed_maps(struct thread *self,
+                                            enum map_type type, FILE *fp)
+{
+       struct map *pos;
+       size_t printed = 0;
+
+       list_for_each_entry(pos, &self->removed_maps[type], node) {
+               printed += fprintf(fp, "Map:");
+               printed += map__fprintf(pos, fp);
+               if (verbose > 1) {
+                       printed += dso__fprintf(pos->dso, type, fp);
+                       printed += fprintf(fp, "--\n");
+               }
+       }
+       return printed;
+}
+
+static size_t thread__fprintf_removed_maps(struct thread *self, FILE *fp)
+{
+       size_t printed = 0, i;
+       for (i = 0; i < MAP__NR_TYPES; ++i)
+               printed += __thread__fprintf_removed_maps(self, i, fp);
+       return printed;
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+       size_t printed = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+       printed += thread__fprintf_removed_maps(self, fp);
+       printed += fprintf(fp, "Removed maps:\n");
+       return printed + thread__fprintf_removed_maps(self, fp);
 }
 
 struct thread *threads__findnew(pid_t pid)
@@ -117,7 +170,8 @@ struct thread *register_idle_thread(void)
 
 static void thread__remove_overlappings(struct thread *self, struct map *map)
 {
-       struct rb_node *next = rb_first(&self->maps);
+       struct rb_root *root = &self->maps[map->type];
+       struct rb_node *next = rb_first(root);
 
        while (next) {
                struct map *pos = rb_entry(next, struct map, rb_node);
@@ -132,13 +186,13 @@ static void thread__remove_overlappings(struct thread *self, struct map *map)
                        map__fprintf(pos, stderr);
                }
 
-               rb_erase(&pos->rb_node, &self->maps);
+               rb_erase(&pos->rb_node, root);
                /*
                 * We may have references to this map, for instance in some
                 * hist_entry instances, so just move them to a separate
                 * list.
                 */
-               list_add_tail(&pos->node, &self->removed_maps);
+               list_add_tail(&pos->node, &self->removed_maps[map->type]);
        }
 }
 
@@ -185,12 +239,26 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
 void thread__insert_map(struct thread *self, struct map *map)
 {
        thread__remove_overlappings(self, map);
-       maps__insert(&self->maps, map);
+       maps__insert(&self->maps[map->type], map);
 }
 
-int thread__fork(struct thread *self, struct thread *parent)
+static int thread__clone_maps(struct thread *self, struct thread *parent,
+                             enum map_type type)
 {
        struct rb_node *nd;
+       for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
+               struct map *map = rb_entry(nd, struct map, rb_node);
+               struct map *new = map__clone(map);
+               if (new == NULL)
+                       return -ENOMEM;
+               thread__insert_map(self, new);
+       }
+       return 0;
+}
+
+int thread__fork(struct thread *self, struct thread *parent)
+{
+       int i;
 
        if (self->comm)
                free(self->comm);
@@ -198,14 +266,9 @@ int thread__fork(struct thread *self, struct thread *parent)
        if (!self->comm)
                return -ENOMEM;
 
-       for (nd = rb_first(&parent->maps); nd; nd = rb_next(nd)) {
-               struct map *map = rb_entry(nd, struct map, rb_node);
-               struct map *new = map__clone(map);
-               if (!new)
+       for (i = 0; i < MAP__NR_TYPES; ++i)
+               if (thread__clone_maps(self, parent, i) < 0)
                        return -ENOMEM;
-               thread__insert_map(self, new);
-       }
-
        return 0;
 }
 
@@ -222,3 +285,15 @@ size_t threads__fprintf(FILE *fp)
 
        return ret;
 }
+
+struct symbol *thread__find_symbol(struct thread *self,
+                                  enum map_type type, u64 addr,
+                                  symbol_filter_t filter)
+{
+       struct map *map = thread__find_map(self, type, addr);
+
+       if (map != NULL)
+               return map__find_symbol(map, map->map_ip(map, addr), filter);
+
+       return NULL;
+}
index 53addd77ce8f2f1c800f9471b8d11f4ce8a6fddb..686d6e914d9e3711bfb1ea8e53e6b275715f2837 100644 (file)
@@ -7,31 +7,50 @@
 
 struct thread {
        struct rb_node          rb_node;
-       struct rb_root          maps;
-       struct list_head        removed_maps;
+       struct rb_root          maps[MAP__NR_TYPES];
+       struct list_head        removed_maps[MAP__NR_TYPES];
        pid_t                   pid;
+       bool                    use_modules;
        char                    shortname[3];
        char                    *comm;
        int                     comm_len;
 };
 
+void thread__init(struct thread *self, pid_t pid);
 int thread__set_comm(struct thread *self, const char *comm);
 int thread__comm_len(struct thread *self);
 struct thread *threads__findnew(pid_t pid);
 struct thread *register_idle_thread(void);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent);
+size_t thread__fprintf_maps(struct thread *self, FILE *fp);
 size_t threads__fprintf(FILE *fp);
 
 void maps__insert(struct rb_root *maps, struct map *map);
-struct map *maps__find(struct rb_root *maps, u64 ip);
+struct map *maps__find(struct rb_root *maps, u64 addr);
 
-struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp);
-struct map *kernel_maps__find_by_dso_name(const char *name);
+static inline struct map *thread__find_map(struct thread *self,
+                                          enum map_type type, u64 addr)
+{
+       return self ? maps__find(&self->maps[type], addr) : NULL;
+}
 
-static inline struct map *thread__find_map(struct thread *self, u64 ip)
+static inline void __thread__insert_map(struct thread *self, struct map *map)
 {
-       return self ? maps__find(&self->maps, ip) : NULL;
+        maps__insert(&self->maps[map->type], map);
 }
 
+void thread__find_addr_location(struct thread *self, u8 cpumode,
+                               enum map_type type, u64 addr,
+                               struct addr_location *al,
+                               symbol_filter_t filter);
+struct symbol *thread__find_symbol(struct thread *self,
+                                  enum map_type type, u64 addr,
+                                  symbol_filter_t filter);
+
+static inline struct symbol *
+thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter)
+{
+       return thread__find_symbol(self, MAP__FUNCTION, addr, filter);
+}
 #endif /* __PERF_THREAD_H */
index 831052d4b4fbef86482af2e73eda14b458093ea5..cace35595530a1dedeb283b0adb651a3af32cd4c 100644 (file)
 #include <ctype.h>
 #include <errno.h>
 #include <stdbool.h>
+#include <linux/kernel.h>
 
 #include "../perf.h"
 #include "trace-event.h"
 
-
 #define VERSION "0.5"
 
 #define _STR(x) #x
@@ -483,23 +483,31 @@ static struct tracepoint_path *
 get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
 {
        struct tracepoint_path path, *ppath = &path;
-       int i;
+       int i, nr_tracepoints = 0;
 
        for (i = 0; i < nb_events; i++) {
                if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
                        continue;
+               ++nr_tracepoints;
                ppath->next = tracepoint_id_to_path(pattrs[i].config);
                if (!ppath->next)
                        die("%s\n", "No memory to alloc tracepoints list");
                ppath = ppath->next;
        }
 
-       return path.next;
+       return nr_tracepoints > 0 ? path.next : NULL;
 }
-void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
+
+int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
 {
        char buf[BUFSIZ];
-       struct tracepoint_path *tps;
+       struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events);
+
+       /*
+        * What? No tracepoints? No sense writing anything here, bail out.
+        */
+       if (tps == NULL)
+               return -1;
 
        output_fd = fd;
 
@@ -528,11 +536,11 @@ void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
        page_size = getpagesize();
        write_or_die(&page_size, 4);
 
-       tps = get_tracepoints_path(pattrs, nb_events);
-
        read_header_files();
        read_ftrace_files(tps);
        read_event_files(tps);
        read_proc_kallsyms();
        read_ftrace_printk();
+
+       return 0;
 }
index eae56050308677fcba7209811be99fdd0157c963..0302405aa2ca7b0f6e7b77cde32c4b49462ff8c9 100644 (file)
@@ -48,6 +48,11 @@ static unsigned long long input_buf_siz;
 
 static int cpus;
 static int long_size;
+static int is_flag_field;
+static int is_symbolic_field;
+
+static struct format_field *
+find_any_field(struct event *event, const char *name);
 
 static void init_input_buf(char *buf, unsigned long long size)
 {
@@ -941,7 +946,8 @@ static int event_read_fields(struct event *event, struct format_field **fields)
                        if (read_expect_type(EVENT_ITEM, &token))
                                goto fail;
 
-                       /* add signed type */
+                       if (strtoul(token, NULL, 0))
+                               field->flags |= FIELD_IS_SIGNED;
 
                        free_token(token);
                        if (read_expected(EVENT_OP, ";") < 0)
@@ -1300,6 +1306,16 @@ process_entry(struct event *event __unused, struct print_arg *arg,
        arg->type = PRINT_FIELD;
        arg->field.name = field;
 
+       if (is_flag_field) {
+               arg->field.field = find_any_field(event, arg->field.name);
+               arg->field.field->flags |= FIELD_IS_FLAG;
+               is_flag_field = 0;
+       } else if (is_symbolic_field) {
+               arg->field.field = find_any_field(event, arg->field.name);
+               arg->field.field->flags |= FIELD_IS_SYMBOLIC;
+               is_symbolic_field = 0;
+       }
+
        type = read_token(&token);
        *tok = token;
 
@@ -1667,9 +1683,11 @@ process_arg_token(struct event *event, struct print_arg *arg,
                        type = process_entry(event, arg, &token);
                } else if (strcmp(token, "__print_flags") == 0) {
                        free_token(token);
+                       is_flag_field = 1;
                        type = process_flags(event, arg, &token);
                } else if (strcmp(token, "__print_symbolic") == 0) {
                        free_token(token);
+                       is_symbolic_field = 1;
                        type = process_symbols(event, arg, &token);
                } else if (strcmp(token, "__get_str") == 0) {
                        free_token(token);
@@ -1870,7 +1888,7 @@ find_any_field(struct event *event, const char *name)
        return find_field(event, name);
 }
 
-static unsigned long long read_size(void *ptr, int size)
+unsigned long long read_size(void *ptr, int size)
 {
        switch (size) {
        case 1:
@@ -1955,7 +1973,7 @@ int trace_parse_common_type(void *data)
                              "common_type");
 }
 
-static int parse_common_pid(void *data)
+int trace_parse_common_pid(void *data)
 {
        static int pid_offset;
        static int pid_size;
@@ -1964,7 +1982,7 @@ static int parse_common_pid(void *data)
                              "common_pid");
 }
 
-static int parse_common_pc(void *data)
+int parse_common_pc(void *data)
 {
        static int pc_offset;
        static int pc_size;
@@ -1973,7 +1991,7 @@ static int parse_common_pc(void *data)
                              "common_preempt_count");
 }
 
-static int parse_common_flags(void *data)
+int parse_common_flags(void *data)
 {
        static int flags_offset;
        static int flags_size;
@@ -1982,7 +2000,7 @@ static int parse_common_flags(void *data)
                              "common_flags");
 }
 
-static int parse_common_lock_depth(void *data)
+int parse_common_lock_depth(void *data)
 {
        static int ld_offset;
        static int ld_size;
@@ -2007,6 +2025,14 @@ struct event *trace_find_event(int id)
        return event;
 }
 
+struct event *trace_find_next_event(struct event *event)
+{
+       if (!event)
+               return event_list;
+
+       return event->next;
+}
+
 static unsigned long long eval_num_arg(void *data, int size,
                                   struct event *event, struct print_arg *arg)
 {
@@ -2146,7 +2172,7 @@ static const struct flag flags[] = {
        { "HRTIMER_RESTART", 1 },
 };
 
-static unsigned long long eval_flag(const char *flag)
+unsigned long long eval_flag(const char *flag)
 {
        int i;
 
@@ -2676,7 +2702,7 @@ get_return_for_leaf(int cpu, int cur_pid, unsigned long long cur_func,
        if (!(event->flags & EVENT_FL_ISFUNCRET))
                return NULL;
 
-       pid = parse_common_pid(next->data);
+       pid = trace_parse_common_pid(next->data);
        field = find_field(event, "func");
        if (!field)
                die("function return does not have field func");
@@ -2962,7 +2988,7 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs,
                return;
        }
 
-       pid = parse_common_pid(data);
+       pid = trace_parse_common_pid(data);
 
        if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET))
                return pretty_print_func_graph(data, size, event, cpu,
diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c
new file mode 100644 (file)
index 0000000..51e833f
--- /dev/null
@@ -0,0 +1,598 @@
+/*
+ * trace-event-perl.  Feed perf trace events to an embedded Perl interpreter.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "trace-event.h"
+#include "trace-event-perl.h"
+
+void xs_init(pTHX);
+
+void boot_Perf__Trace__Context(pTHX_ CV *cv);
+void boot_DynaLoader(pTHX_ CV *cv);
+
+void xs_init(pTHX)
+{
+       const char *file = __FILE__;
+       dXSUB_SYS;
+
+       newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context,
+             file);
+       newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file);
+}
+
+INTERP my_perl;
+
+#define FTRACE_MAX_EVENT                               \
+       ((1 << (sizeof(unsigned short) * 8)) - 1)
+
+struct event *events[FTRACE_MAX_EVENT];
+
+static struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static void define_symbolic_value(const char *ev_name,
+                                 const char *field_name,
+                                 const char *field_value,
+                                 const char *field_str)
+{
+       unsigned long long value;
+       dSP;
+
+       value = eval_flag(field_value);
+
+       ENTER;
+       SAVETMPS;
+       PUSHMARK(SP);
+
+       XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+       XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+       XPUSHs(sv_2mortal(newSVuv(value)));
+       XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+       PUTBACK;
+       if (get_cv("main::define_symbolic_value", 0))
+               call_pv("main::define_symbolic_value", G_SCALAR);
+       SPAGAIN;
+       PUTBACK;
+       FREETMPS;
+       LEAVE;
+}
+
+static void define_symbolic_values(struct print_flag_sym *field,
+                                  const char *ev_name,
+                                  const char *field_name)
+{
+       define_symbolic_value(ev_name, field_name, field->value, field->str);
+       if (field->next)
+               define_symbolic_values(field->next, ev_name, field_name);
+}
+
+static void define_symbolic_field(const char *ev_name,
+                                 const char *field_name)
+{
+       dSP;
+
+       ENTER;
+       SAVETMPS;
+       PUSHMARK(SP);
+
+       XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+       XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+
+       PUTBACK;
+       if (get_cv("main::define_symbolic_field", 0))
+               call_pv("main::define_symbolic_field", G_SCALAR);
+       SPAGAIN;
+       PUTBACK;
+       FREETMPS;
+       LEAVE;
+}
+
+static void define_flag_value(const char *ev_name,
+                             const char *field_name,
+                             const char *field_value,
+                             const char *field_str)
+{
+       unsigned long long value;
+       dSP;
+
+       value = eval_flag(field_value);
+
+       ENTER;
+       SAVETMPS;
+       PUSHMARK(SP);
+
+       XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+       XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+       XPUSHs(sv_2mortal(newSVuv(value)));
+       XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+       PUTBACK;
+       if (get_cv("main::define_flag_value", 0))
+               call_pv("main::define_flag_value", G_SCALAR);
+       SPAGAIN;
+       PUTBACK;
+       FREETMPS;
+       LEAVE;
+}
+
+static void define_flag_values(struct print_flag_sym *field,
+                              const char *ev_name,
+                              const char *field_name)
+{
+       define_flag_value(ev_name, field_name, field->value, field->str);
+       if (field->next)
+               define_flag_values(field->next, ev_name, field_name);
+}
+
+static void define_flag_field(const char *ev_name,
+                             const char *field_name,
+                             const char *delim)
+{
+       dSP;
+
+       ENTER;
+       SAVETMPS;
+       PUSHMARK(SP);
+
+       XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+       XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+       XPUSHs(sv_2mortal(newSVpv(delim, 0)));
+
+       PUTBACK;
+       if (get_cv("main::define_flag_field", 0))
+               call_pv("main::define_flag_field", G_SCALAR);
+       SPAGAIN;
+       PUTBACK;
+       FREETMPS;
+       LEAVE;
+}
+
+static void define_event_symbols(struct event *event,
+                                const char *ev_name,
+                                struct print_arg *args)
+{
+       switch (args->type) {
+       case PRINT_NULL:
+               break;
+       case PRINT_ATOM:
+               define_flag_value(ev_name, cur_field_name, "0",
+                                 args->atom.atom);
+               zero_flag_atom = 0;
+               break;
+       case PRINT_FIELD:
+               if (cur_field_name)
+                       free(cur_field_name);
+               cur_field_name = strdup(args->field.name);
+               break;
+       case PRINT_FLAGS:
+               define_event_symbols(event, ev_name, args->flags.field);
+               define_flag_field(ev_name, cur_field_name, args->flags.delim);
+               define_flag_values(args->flags.flags, ev_name, cur_field_name);
+               break;
+       case PRINT_SYMBOL:
+               define_event_symbols(event, ev_name, args->symbol.field);
+               define_symbolic_field(ev_name, cur_field_name);
+               define_symbolic_values(args->symbol.symbols, ev_name,
+                                      cur_field_name);
+               break;
+       case PRINT_STRING:
+               break;
+       case PRINT_TYPE:
+               define_event_symbols(event, ev_name, args->typecast.item);
+               break;
+       case PRINT_OP:
+               if (strcmp(args->op.op, ":") == 0)
+                       zero_flag_atom = 1;
+               define_event_symbols(event, ev_name, args->op.left);
+               define_event_symbols(event, ev_name, args->op.right);
+               break;
+       default:
+               /* we should warn... */
+               return;
+       }
+
+       if (args->next)
+               define_event_symbols(event, ev_name, args->next);
+}
+
+static inline struct event *find_cache_event(int type)
+{
+       static char ev_name[256];
+       struct event *event;
+
+       if (events[type])
+               return events[type];
+
+       events[type] = event = trace_find_event(type);
+       if (!event)
+               return NULL;
+
+       sprintf(ev_name, "%s::%s", event->system, event->name);
+
+       define_event_symbols(event, ev_name, event->print_fmt.args);
+
+       return event;
+}
+
+int common_pc(struct scripting_context *context)
+{
+       int pc;
+
+       pc = parse_common_pc(context->event_data);
+
+       return pc;
+}
+
+int common_flags(struct scripting_context *context)
+{
+       int flags;
+
+       flags = parse_common_flags(context->event_data);
+
+       return flags;
+}
+
+int common_lock_depth(struct scripting_context *context)
+{
+       int lock_depth;
+
+       lock_depth = parse_common_lock_depth(context->event_data);
+
+       return lock_depth;
+}
+
+static void perl_process_event(int cpu, void *data,
+                              int size __attribute((unused)),
+                              unsigned long long nsecs, char *comm)
+{
+       struct format_field *field;
+       static char handler[256];
+       unsigned long long val;
+       unsigned long s, ns;
+       struct event *event;
+       int type;
+       int pid;
+
+       dSP;
+
+       type = trace_parse_common_type(data);
+
+       event = find_cache_event(type);
+       if (!event)
+               die("ug! no event found for type %d", type);
+
+       pid = trace_parse_common_pid(data);
+
+       sprintf(handler, "%s::%s", event->system, event->name);
+
+       s = nsecs / NSECS_PER_SEC;
+       ns = nsecs - s * NSECS_PER_SEC;
+
+       scripting_context->event_data = data;
+
+       ENTER;
+       SAVETMPS;
+       PUSHMARK(SP);
+
+       XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+       XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+       XPUSHs(sv_2mortal(newSVuv(cpu)));
+       XPUSHs(sv_2mortal(newSVuv(s)));
+       XPUSHs(sv_2mortal(newSVuv(ns)));
+       XPUSHs(sv_2mortal(newSViv(pid)));
+       XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+
+       /* common fields other than pid can be accessed via xsub fns */
+
+       for (field = event->format.fields; field; field = field->next) {
+               if (field->flags & FIELD_IS_STRING) {
+                       int offset;
+                       if (field->flags & FIELD_IS_DYNAMIC) {
+                               offset = *(int *)(data + field->offset);
+                               offset &= 0xffff;
+                       } else
+                               offset = field->offset;
+                       XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
+               } else { /* FIELD_IS_NUMERIC */
+                       val = read_size(data + field->offset, field->size);
+                       if (field->flags & FIELD_IS_SIGNED) {
+                               XPUSHs(sv_2mortal(newSViv(val)));
+                       } else {
+                               XPUSHs(sv_2mortal(newSVuv(val)));
+                       }
+               }
+       }
+
+       PUTBACK;
+
+       if (get_cv(handler, 0))
+               call_pv(handler, G_SCALAR);
+       else if (get_cv("main::trace_unhandled", 0)) {
+               XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+               XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+               XPUSHs(sv_2mortal(newSVuv(cpu)));
+               XPUSHs(sv_2mortal(newSVuv(nsecs)));
+               XPUSHs(sv_2mortal(newSViv(pid)));
+               XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+               call_pv("main::trace_unhandled", G_SCALAR);
+       }
+       SPAGAIN;
+       PUTBACK;
+       FREETMPS;
+       LEAVE;
+}
+
+static void run_start_sub(void)
+{
+       dSP; /* access to Perl stack */
+       PUSHMARK(SP);
+
+       if (get_cv("main::trace_begin", 0))
+               call_pv("main::trace_begin", G_DISCARD | G_NOARGS);
+}
+
+/*
+ * Start trace script
+ */
+static int perl_start_script(const char *script)
+{
+       const char *command_line[2] = { "", NULL };
+
+       command_line[1] = script;
+
+       my_perl = perl_alloc();
+       perl_construct(my_perl);
+
+       if (perl_parse(my_perl, xs_init, 2, (char **)command_line,
+                      (char **)NULL))
+               return -1;
+
+       perl_run(my_perl);
+       if (SvTRUE(ERRSV))
+               return -1;
+
+       run_start_sub();
+
+       fprintf(stderr, "perf trace started with Perl script %s\n\n", script);
+
+       return 0;
+}
+
+/*
+ * Stop trace script
+ */
+static int perl_stop_script(void)
+{
+       dSP; /* access to Perl stack */
+       PUSHMARK(SP);
+
+       if (get_cv("main::trace_end", 0))
+               call_pv("main::trace_end", G_DISCARD | G_NOARGS);
+
+       perl_destruct(my_perl);
+       perl_free(my_perl);
+
+       fprintf(stderr, "\nperf trace Perl script stopped\n");
+
+       return 0;
+}
+
+static int perl_generate_script(const char *outfile)
+{
+       struct event *event = NULL;
+       struct format_field *f;
+       char fname[PATH_MAX];
+       int not_first, count;
+       FILE *ofp;
+
+       sprintf(fname, "%s.pl", outfile);
+       ofp = fopen(fname, "w");
+       if (ofp == NULL) {
+               fprintf(stderr, "couldn't open %s\n", fname);
+               return -1;
+       }
+
+       fprintf(ofp, "# perf trace event handlers, "
+               "generated by perf trace -g perl\n");
+
+       fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+               " License version 2\n\n");
+
+       fprintf(ofp, "# The common_* event handler fields are the most useful "
+               "fields common to\n");
+
+       fprintf(ofp, "# all events.  They don't necessarily correspond to "
+               "the 'common_*' fields\n");
+
+       fprintf(ofp, "# in the format files.  Those fields not available as "
+               "handler params can\n");
+
+       fprintf(ofp, "# be retrieved using Perl functions of the form "
+               "common_*($context).\n");
+
+       fprintf(ofp, "# See Context.pm for the list of available "
+               "functions.\n\n");
+
+       fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/"
+               "Perf-Trace-Util/lib\";\n");
+
+       fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n");
+       fprintf(ofp, "use Perf::Trace::Core;\n");
+       fprintf(ofp, "use Perf::Trace::Context;\n");
+       fprintf(ofp, "use Perf::Trace::Util;\n\n");
+
+       fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
+       fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n");
+
+       while ((event = trace_find_next_event(event))) {
+               fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
+               fprintf(ofp, "\tmy (");
+
+               fprintf(ofp, "$event_name, ");
+               fprintf(ofp, "$context, ");
+               fprintf(ofp, "$common_cpu, ");
+               fprintf(ofp, "$common_secs, ");
+               fprintf(ofp, "$common_nsecs,\n");
+               fprintf(ofp, "\t    $common_pid, ");
+               fprintf(ofp, "$common_comm,\n\t    ");
+
+               not_first = 0;
+               count = 0;
+
+               for (f = event->format.fields; f; f = f->next) {
+                       if (not_first++)
+                               fprintf(ofp, ", ");
+                       if (++count % 5 == 0)
+                               fprintf(ofp, "\n\t    ");
+
+                       fprintf(ofp, "$%s", f->name);
+               }
+               fprintf(ofp, ") = @_;\n\n");
+
+               fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+                       "$common_secs, $common_nsecs,\n\t             "
+                       "$common_pid, $common_comm);\n\n");
+
+               fprintf(ofp, "\tprintf(\"");
+
+               not_first = 0;
+               count = 0;
+
+               for (f = event->format.fields; f; f = f->next) {
+                       if (not_first++)
+                               fprintf(ofp, ", ");
+                       if (count && count % 4 == 0) {
+                               fprintf(ofp, "\".\n\t       \"");
+                       }
+                       count++;
+
+                       fprintf(ofp, "%s=", f->name);
+                       if (f->flags & FIELD_IS_STRING ||
+                           f->flags & FIELD_IS_FLAG ||
+                           f->flags & FIELD_IS_SYMBOLIC)
+                               fprintf(ofp, "%%s");
+                       else if (f->flags & FIELD_IS_SIGNED)
+                               fprintf(ofp, "%%d");
+                       else
+                               fprintf(ofp, "%%u");
+               }
+
+               fprintf(ofp, "\\n\",\n\t       ");
+
+               not_first = 0;
+               count = 0;
+
+               for (f = event->format.fields; f; f = f->next) {
+                       if (not_first++)
+                               fprintf(ofp, ", ");
+
+                       if (++count % 5 == 0)
+                               fprintf(ofp, "\n\t       ");
+
+                       if (f->flags & FIELD_IS_FLAG) {
+                               if ((count - 1) % 5 != 0) {
+                                       fprintf(ofp, "\n\t       ");
+                                       count = 4;
+                               }
+                               fprintf(ofp, "flag_str(\"");
+                               fprintf(ofp, "%s::%s\", ", event->system,
+                                       event->name);
+                               fprintf(ofp, "\"%s\", $%s)", f->name,
+                                       f->name);
+                       } else if (f->flags & FIELD_IS_SYMBOLIC) {
+                               if ((count - 1) % 5 != 0) {
+                                       fprintf(ofp, "\n\t       ");
+                                       count = 4;
+                               }
+                               fprintf(ofp, "symbol_str(\"");
+                               fprintf(ofp, "%s::%s\", ", event->system,
+                                       event->name);
+                               fprintf(ofp, "\"%s\", $%s)", f->name,
+                                       f->name);
+                       } else
+                               fprintf(ofp, "$%s", f->name);
+               }
+
+               fprintf(ofp, ");\n");
+               fprintf(ofp, "}\n\n");
+       }
+
+       fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
+               "$common_cpu, $common_secs, $common_nsecs,\n\t    "
+               "$common_pid, $common_comm) = @_;\n\n");
+
+       fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+               "$common_secs, $common_nsecs,\n\t             $common_pid, "
+               "$common_comm);\n}\n\n");
+
+       fprintf(ofp, "sub print_header\n{\n"
+               "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
+               "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t       "
+               "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}");
+
+       fclose(ofp);
+
+       fprintf(stderr, "generated Perl script: %s\n", fname);
+
+       return 0;
+}
+
+struct scripting_ops perl_scripting_ops = {
+       .name = "Perl",
+       .start_script = perl_start_script,
+       .stop_script = perl_stop_script,
+       .process_event = perl_process_event,
+       .generate_script = perl_generate_script,
+};
+
+#ifdef NO_LIBPERL
+void setup_perl_scripting(void)
+{
+       fprintf(stderr, "Perl scripting not supported."
+               "  Install libperl and rebuild perf to enable it.  e.g. "
+               "apt-get install libperl-dev (ubuntu), yum install "
+               "perl-ExtUtils-Embed (Fedora), etc.\n");
+}
+#else
+void setup_perl_scripting(void)
+{
+       int err;
+       err = script_spec_register("Perl", &perl_scripting_ops);
+       if (err)
+               die("error registering Perl script extension");
+
+       err = script_spec_register("pl", &perl_scripting_ops);
+       if (err)
+               die("error registering pl script extension");
+
+       scripting_context = malloc(sizeof(struct scripting_context));
+}
+#endif
diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h
new file mode 100644 (file)
index 0000000..8fe0d86
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef __PERF_TRACE_EVENT_PERL_H
+#define __PERF_TRACE_EVENT_PERL_H
+#ifdef NO_LIBPERL
+typedef int INTERP;
+#define dSP
+#define ENTER
+#define SAVETMPS
+#define PUTBACK
+#define SPAGAIN
+#define FREETMPS
+#define LEAVE
+#define SP
+#define ERRSV
+#define G_SCALAR               (0)
+#define G_DISCARD              (0)
+#define G_NOARGS               (0)
+#define PUSHMARK(a)
+#define SvTRUE(a)              (0)
+#define XPUSHs(s)
+#define sv_2mortal(a)
+#define newSVpv(a,b)
+#define newSVuv(a)
+#define newSViv(a)
+#define get_cv(a,b)            (0)
+#define call_pv(a,b)           (0)
+#define perl_alloc()           (0)
+#define perl_construct(a)      (0)
+#define perl_parse(a,b,c,d,e)  (0)
+#define perl_run(a)            (0)
+#define perl_destruct(a)       (0)
+#define perl_free(a)           (0)
+#define pTHX                   void
+#define CV                     void
+#define dXSUB_SYS
+#define pTHX_
+static inline void newXS(const char *a, void *b, const char *c) {}
+#else
+#include <EXTERN.h>
+#include <perl.h>
+typedef PerlInterpreter * INTERP;
+#endif
+
+struct scripting_context {
+       void *event_data;
+};
+
+int common_pc(struct scripting_context *context);
+int common_flags(struct scripting_context *context);
+int common_lock_depth(struct scripting_context *context);
+
+#endif /* __PERF_TRACE_EVENT_PERL_H */
index 44292e06cca41557c05b6e66281a6d4ed427f063..342dfdd43f875117a0306cb7cd4bb5f7618f82db 100644 (file)
@@ -471,11 +471,11 @@ void trace_report(int fd)
 
        read_or_die(buf, 3);
        if (memcmp(buf, test, 3) != 0)
-               die("not an trace data file");
+               die("no trace data in the file");
 
        read_or_die(buf, 7);
        if (memcmp(buf, "tracing", 7) != 0)
-               die("not a trace file (missing tracing)");
+               die("not a trace file (missing 'tracing' tag)");
 
        version = read_string();
        if (show_version)
index f6637c2fa1fefede4fba8aeaec620c03add1d981..81698d5e65039b1c299e92f413dd5f2bf844af7c 100644 (file)
@@ -29,6 +29,8 @@ enum format_flags {
        FIELD_IS_SIGNED         = 4,
        FIELD_IS_STRING         = 8,
        FIELD_IS_DYNAMIC        = 16,
+       FIELD_IS_FLAG           = 32,
+       FIELD_IS_SYMBOLIC       = 64,
 };
 
 struct format_field {
@@ -243,12 +245,19 @@ extern int latency_format;
 
 int parse_header_page(char *buf, unsigned long size);
 int trace_parse_common_type(void *data);
+int trace_parse_common_pid(void *data);
+int parse_common_pc(void *data);
+int parse_common_flags(void *data);
+int parse_common_lock_depth(void *data);
 struct event *trace_find_event(int id);
+struct event *trace_find_next_event(struct event *event);
+unsigned long long read_size(void *ptr, int size);
 unsigned long long
 raw_field_value(struct event *event, const char *name, void *data);
 void *raw_field_ptr(struct event *event, const char *name, void *data);
+unsigned long long eval_flag(const char *flag);
 
-void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
+int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
 
 /* taken from kernel/trace/trace.h */
 enum trace_flag_type {
@@ -259,4 +268,18 @@ enum trace_flag_type {
        TRACE_FLAG_SOFTIRQ              = 0x10,
 };
 
+struct scripting_ops {
+       const char *name;
+       int (*start_script) (const char *);
+       int (*stop_script) (void);
+       void (*process_event) (int cpu, void *data, int size,
+                              unsigned long long nsecs, char *comm);
+       int (*generate_script) (const char *outfile);
+};
+
+int script_spec_register(const char *spec, struct scripting_ops *ops);
+
+extern struct scripting_ops perl_scripting_ops;
+void setup_perl_scripting(void);
+
 #endif /* __PERF_TRACE_EVENTS_H */
index f2203a0946bcbb04917b5473f9788ae62dbc149d..c673d8825883ce2bec91b03f5c1e474a756fde19 100644 (file)
@@ -84,6 +84,9 @@
 #include <iconv.h>
 #endif
 
+extern const char *graph_line;
+extern const char *graph_dotted_line;
+
 /* On most systems <limits.h> would have given us this, but
  * not on some systems (e.g. GNU/Hurd).
  */
@@ -287,17 +290,15 @@ static inline char *gitstrchrnul(const char *s, int c)
  * Wrappers:
  */
 extern char *xstrdup(const char *str);
-extern void *xmalloc(size_t size);
+extern void *xmalloc(size_t size) __attribute__((weak));
 extern void *xmemdupz(const void *data, size_t len);
 extern char *xstrndup(const char *str, size_t len);
-extern void *xrealloc(void *ptr, size_t size);
-extern void *xcalloc(size_t nmemb, size_t size);
-extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
-extern ssize_t xread(int fd, void *buf, size_t len);
-extern ssize_t xwrite(int fd, const void *buf, size_t len);
-extern int xdup(int fd);
-extern FILE *xfdopen(int fd, const char *mode);
-extern int xmkstemp(char *template);
+extern void *xrealloc(void *ptr, size_t size) __attribute__((weak));
+
+static inline void *zalloc(size_t size)
+{
+       return calloc(1, size);
+}
 
 static inline size_t xsize_t(off_t len)
 {
index 4574ac28396f6779fcecacfafe570dc0a5e01dc6..bf44ca85d23be29334d69ef6718d9b613f06a9bb 100644 (file)
@@ -79,43 +79,12 @@ void *xrealloc(void *ptr, size_t size)
        return ret;
 }
 
-void *xcalloc(size_t nmemb, size_t size)
-{
-       void *ret = calloc(nmemb, size);
-       if (!ret && (!nmemb || !size))
-               ret = calloc(1, 1);
-       if (!ret) {
-               release_pack_memory(nmemb * size, -1);
-               ret = calloc(nmemb, size);
-               if (!ret && (!nmemb || !size))
-                       ret = calloc(1, 1);
-               if (!ret)
-                       die("Out of memory, calloc failed");
-       }
-       return ret;
-}
-
-void *xmmap(void *start, size_t length,
-       int prot, int flags, int fd, off_t offset)
-{
-       void *ret = mmap(start, length, prot, flags, fd, offset);
-       if (ret == MAP_FAILED) {
-               if (!length)
-                       return NULL;
-               release_pack_memory(length, fd);
-               ret = mmap(start, length, prot, flags, fd, offset);
-               if (ret == MAP_FAILED)
-                       die("Out of memory? mmap failed: %s", strerror(errno));
-       }
-       return ret;
-}
-
 /*
  * xread() is the same a read(), but it automatically restarts read()
  * operations with a recoverable error (EAGAIN and EINTR). xread()
  * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
  */
-ssize_t xread(int fd, void *buf, size_t len)
+static ssize_t xread(int fd, void *buf, size_t len)
 {
        ssize_t nr;
        while (1) {
@@ -131,7 +100,7 @@ ssize_t xread(int fd, void *buf, size_t len)
  * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
  * GUARANTEE that "len" bytes is written even if the operation is successful.
  */
-ssize_t xwrite(int fd, const void *buf, size_t len)
+static ssize_t xwrite(int fd, const void *buf, size_t len)
 {
        ssize_t nr;
        while (1) {
@@ -179,29 +148,3 @@ ssize_t write_in_full(int fd, const void *buf, size_t count)
 
        return total;
 }
-
-int xdup(int fd)
-{
-       int ret = dup(fd);
-       if (ret < 0)
-               die("dup failed: %s", strerror(errno));
-       return ret;
-}
-
-FILE *xfdopen(int fd, const char *mode)
-{
-       FILE *stream = fdopen(fd, mode);
-       if (stream == NULL)
-               die("Out of memory? fdopen failed: %s", strerror(errno));
-       return stream;
-}
-
-int xmkstemp(char *template)
-{
-       int fd;
-
-       fd = mkstemp(template);
-       if (fd < 0)
-               die("Unable to create temporary file: %s", strerror(errno));
-       return fd;
-}