Merge commit 'linus/master' into tracing/kprobes

author Frederic Weisbecker <fweisbec@gmail.com>

Wed, 23 Sep 2009 21:08:43 +0000 (23:08 +0200)

committer Frederic Weisbecker <fweisbec@gmail.com>

Wed, 23 Sep 2009 21:08:43 +0000 (23:08 +0200)
author Frederic Weisbecker <fweisbec@gmail.com>
Wed, 23 Sep 2009 21:08:43 +0000 (23:08 +0200)
committer Frederic Weisbecker <fweisbec@gmail.com>
Wed, 23 Sep 2009 21:08:43 +0000 (23:08 +0200)
diff --combined arch/x86/Makefile

index 5fe16bfd15ac949736623bef580953bb71f66a67,7983c420eaf2a304843b9b0563b4811626720c71..4aefc034e9a0a869593a88b7aaf2423f6aaf618b
--- 1/arch/x86/Makefile
--- 2/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@@ -32,8 -32,8 +32,8 @@@ ifeq ($(CONFIG_X86_32),y
   
           # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
           # a lot more stack due to the lack of sharing of stacklots:
-         KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
-                 echo $(call cc-option,-fno-unit-at-a-time); fi ;)
+         KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
+                               $(call cc-option,-fno-unit-at-a-time))
   
           # CPU-specific tuning. Anything which can be shared with UML should go here.
           include $(srctree)/arch/x86/Makefile_32.cpu
@@@ -55,6 -55,8 +55,8 @@@ els
   
           cflags-$(CONFIG_MCORE2) += \
                   $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
+       cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
+               $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
           cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
           KBUILD_CFLAGS += $(cflags-y)
   
@@@ -72,7 -74,7 +74,7 @@@ endi
   
   ifdef CONFIG_CC_STACKPROTECTOR
         cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
-         ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
+         ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y)
                   stackp-y := -fstack-protector
                   stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
                   KBUILD_CFLAGS += $(stackp-y)
@@@ -154,9 -156,6 +156,9 @@@ all: bzImag
   KBUILD_IMAGE := $(boot)/bzImage
   
   bzImage: vmlinux
+ +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
+ +      $(Q)$(MAKE) $(build)=arch/x86/tools posttest
+ +endif
         $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
         $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
         $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
diff --combined arch/x86/kernel/entry_64.S

index 36e2ef5cc83fdba563e05c76bfe5d3fb0590ad64,b5c061f8f358489fee828e48b836d18c3bd6da79..42a0b2cbf2e12652c7ca0734565742a450793a14
--- 1/arch/x86/kernel/entry_64.S
--- 2/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@@ -146,7 -146,7 +146,7 @@@ ENTRY(ftrace_graph_caller
   END(ftrace_graph_caller)
   
   GLOBAL(return_to_handler)
-       subq  $80, %rsp
+       subq  $24, %rsp
   
         /* Save the return values */
         movq %rax, (%rsp)
@@@ -155,10 -155,10 +155,10 @@@
   
         call ftrace_return_to_handler
   
-       movq %rax, 72(%rsp)
+       movq %rax, 16(%rsp)
         movq 8(%rsp), %rdx
         movq (%rsp), %rax
-       addq $72, %rsp
+       addq $16, %rsp
         retq
   #endif
   
@@@ -536,20 -536,13 +536,13 @@@ sysret_signal
         bt $TIF_SYSCALL_AUDIT,%edx
         jc sysret_audit
   #endif
-       /* edx: work flags (arg3) */
-       leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
-       xorl %esi,%esi # oldset -> arg2
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call do_notify_resume
-       RESTORE_TOP_OF_STACK %r11
-       RESTORE_REST
-       movl $_TIF_WORK_MASK,%edi
-       /* Use IRET because user could have changed frame. This
-          works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
-       DISABLE_INTERRUPTS(CLBR_NONE)
-       TRACE_IRQS_OFF
-       jmp int_with_check
+       /*
+        * We have a signal, or exit tracing or single-step.
+        * These all wind up with the iret return path anyway,
+        * so just join that path right now.
+        */
+       FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+       jmp int_check_syscall_exit_work
   
   badsys:
         movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@@ -654,6 -647,7 +647,7 @@@ int_careful
   int_very_careful:
         TRACE_IRQS_ON
         ENABLE_INTERRUPTS(CLBR_NONE)
+ int_check_syscall_exit_work:
         SAVE_REST
         /* Check for syscall exit trace */
         testl $_TIF_WORK_SYSCALL_EXIT,%edx
@@@ -809,10 -803,6 +803,10 @@@ END(interrupt
         call \func
         .endm
   
+ +/*
+ + * Interrupt entry/exit should be protected against kprobes
+ + */
+ +      .pushsection .kprobes.text, "ax"
         /*
          * The interrupt stubs push (~vector+0x80) onto the stack and
          * then jump to common_interrupt.
@@@ -951,10 -941,6 +945,10 @@@ ENTRY(retint_kernel
   
         CFI_ENDPROC
   END(common_interrupt)
+ +/*
+ + * End of kprobes section
+ + */
+ +       .popsection
   
   /*
    * APIC interrupts.
@@@ -1029,7 -1015,7 +1023,7 @@@ apicinterrupt ERROR_APIC_VECTOR 
   apicinterrupt SPURIOUS_APIC_VECTOR \
         spurious_interrupt smp_spurious_interrupt
   
- #ifdef CONFIG_PERF_COUNTERS
+ #ifdef CONFIG_PERF_EVENTS
   apicinterrupt LOCAL_PENDING_VECTOR \
         perf_pending_interrupt smp_perf_pending_interrupt
   #endif
diff --combined arch/x86/kernel/ptrace.c

index caffb6809452f0a2da3de34fecf240c64dcb757e,7b058a2dc66afecdaeb58877102957dff77e7d81..c4f76d275ee4cda38bed14268a4ac3b6f04d7436
--- 1/arch/x86/kernel/ptrace.c
--- 2/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@@ -49,118 -49,6 +49,118 @@@ enum x86_regset 
         REGSET_IOPERM32,
   };
   
+ +struct pt_regs_offset {
+ +      const char *name;
+ +      int offset;
+ +};
+ +
+ +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+ +#define REG_OFFSET_END {.name = NULL, .offset = 0}
+ +
+ +static const struct pt_regs_offset regoffset_table[] = {
+ +#ifdef CONFIG_X86_64
+ +      REG_OFFSET_NAME(r15),
+ +      REG_OFFSET_NAME(r14),
+ +      REG_OFFSET_NAME(r13),
+ +      REG_OFFSET_NAME(r12),
+ +      REG_OFFSET_NAME(r11),
+ +      REG_OFFSET_NAME(r10),
+ +      REG_OFFSET_NAME(r9),
+ +      REG_OFFSET_NAME(r8),
+ +#endif
+ +      REG_OFFSET_NAME(bx),
+ +      REG_OFFSET_NAME(cx),
+ +      REG_OFFSET_NAME(dx),
+ +      REG_OFFSET_NAME(si),
+ +      REG_OFFSET_NAME(di),
+ +      REG_OFFSET_NAME(bp),
+ +      REG_OFFSET_NAME(ax),
+ +#ifdef CONFIG_X86_32
+ +      REG_OFFSET_NAME(ds),
+ +      REG_OFFSET_NAME(es),
+ +      REG_OFFSET_NAME(fs),
+ +      REG_OFFSET_NAME(gs),
+ +#endif
+ +      REG_OFFSET_NAME(orig_ax),
+ +      REG_OFFSET_NAME(ip),
+ +      REG_OFFSET_NAME(cs),
+ +      REG_OFFSET_NAME(flags),
+ +      REG_OFFSET_NAME(sp),
+ +      REG_OFFSET_NAME(ss),
+ +      REG_OFFSET_END,
+ +};
+ +
+ +/**
+ + * regs_query_register_offset() - query register offset from its name
+ + * @name:     the name of a register
+ + *
+ + * regs_query_register_offset() returns the offset of a register in struct
+ + * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ + */
+ +int regs_query_register_offset(const char *name)
+ +{
+ +      const struct pt_regs_offset *roff;
+ +      for (roff = regoffset_table; roff->name != NULL; roff++)
+ +              if (!strcmp(roff->name, name))
+ +                      return roff->offset;
+ +      return -EINVAL;
+ +}
+ +
+ +/**
+ + * regs_query_register_name() - query register name from its offset
+ + * @offset:   the offset of a register in struct pt_regs.
+ + *
+ + * regs_query_register_name() returns the name of a register from its
+ + * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ + */
+ +const char *regs_query_register_name(unsigned int offset)
+ +{
+ +      const struct pt_regs_offset *roff;
+ +      for (roff = regoffset_table; roff->name != NULL; roff++)
+ +              if (roff->offset == offset)
+ +                      return roff->name;
+ +      return NULL;
+ +}
+ +
+ +static const int arg_offs_table[] = {
+ +#ifdef CONFIG_X86_32
+ +      [0] = offsetof(struct pt_regs, ax),
+ +      [1] = offsetof(struct pt_regs, dx),
+ +      [2] = offsetof(struct pt_regs, cx)
+ +#else /* CONFIG_X86_64 */
+ +      [0] = offsetof(struct pt_regs, di),
+ +      [1] = offsetof(struct pt_regs, si),
+ +      [2] = offsetof(struct pt_regs, dx),
+ +      [3] = offsetof(struct pt_regs, cx),
+ +      [4] = offsetof(struct pt_regs, r8),
+ +      [5] = offsetof(struct pt_regs, r9)
+ +#endif
+ +};
+ +
+ +/**
+ + * regs_get_argument_nth() - get Nth argument at function call
+ + * @regs:     pt_regs which contains registers at function entry.
+ + * @n:                argument number.
+ + *
+ + * regs_get_argument_nth() returns @n th argument of a function call.
+ + * Since usually the kernel stack will be changed right after function entry,
+ + * you must use this at function entry. If the @n th entry is NOT in the
+ + * kernel stack or pt_regs, this returns 0.
+ + */
+ +unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
+ +{
+ +      if (n < ARRAY_SIZE(arg_offs_table))
+ +              return *(unsigned long *)((char *)regs + arg_offs_table[n]);
+ +      else {
+ +              /*
+ +               * The typical case: arg n is on the stack.
+ +               * (Note: stack[0] = return address, so skip it)
+ +               */
+ +              n -= ARRAY_SIZE(arg_offs_table);
+ +              return regs_get_kernel_stack_nth(regs, 1 + n);
+ +      }
+ +}
+ +
   /*
    * does not yet catch signals sent when the child dies.
    * in exit.c or in signal.c.
@@@ -437,16 -325,6 +437,6 @@@ static int putreg(struct task_struct *c
                 return set_flags(child, value);
   
   #ifdef CONFIG_X86_64
-       /*
-        * Orig_ax is really just a flag with small positive and
-        * negative values, so make sure to always sign-extend it
-        * from 32 bits so that it works correctly regardless of
-        * whether we come from a 32-bit environment or not.
-        */
-       case offsetof(struct user_regs_struct, orig_ax):
-               value = (long) (s32) value;
-               break;
- 
         case offsetof(struct user_regs_struct,fs_base):
                 if (value >= TASK_SIZE_OF(child))
                         return -EIO;
@@@ -1238,10 -1116,15 +1228,15 @@@ static int putreg32(struct task_struct 
   
         case offsetof(struct user32, regs.orig_eax):
                 /*
-                * Sign-extend the value so that orig_eax = -1
-                * causes (long)orig_ax < 0 tests to fire correctly.
+                * A 32-bit debugger setting orig_eax means to restore
+                * the state of the task restarting a 32-bit syscall.
+                * Make sure we interpret the -ERESTART* codes correctly
+                * in case the task is not actually still sitting at the
+                * exit from a 32-bit syscall with TS_COMPAT still set.
                  */
-               regs->orig_ax = (long) (s32) value;
+               regs->orig_ax = value;
+               if (syscall_get_nr(child, regs) >= 0)
+                       task_thread_info(child)->status |= TS_COMPAT;
                 break;
   
         case offsetof(struct user32, regs.eflags):
diff --combined arch/x86/lib/Makefile

index c77f8a7c531dfd4819d7b5f778a70af6182b3777,9e609206fac9b668b906c5949a3b018f9804752c..965026472c717914e86e4c164c557bf825df4861
--- 1/arch/x86/lib/Makefile
--- 2/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@@ -2,26 -2,15 +2,28 @@@
   # Makefile for x86 specific library files.
   #
   
+ +inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
+ +inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
+ +quiet_cmd_inat_tables = GEN     $@
+ +      cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
+ +
+ +$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+ +      $(call cmd,inat_tables)
+ +
+ +$(obj)/inat.o: $(obj)/inat-tables.c
+ +
+ +clean-files := inat-tables.c
+ +
   obj-$(CONFIG_SMP) := msr.o
   
   lib-y := delay.o
   lib-y += thunk_$(BITS).o
   lib-y += usercopy_$(BITS).o getuser.o putuser.o
   lib-y += memcpy_$(BITS).o
+ +lib-y += insn.o inat.o
   
+ obj-y += msr-reg.o msr-reg-export.o
+ 
   ifeq ($(CONFIG_X86_32),y)
           obj-y += atomic64_32.o
           lib-y += checksum_32.o
diff --combined arch/x86/mm/fault.c

index c322e59f2d107aa550b6a7577a4ab58caa9b2497,82728f2c6d5599ccda0c4cb1dee132ce3305f5ca..923ea3fb7037111b004a7d715d5108c1d97c2668
--- 1/arch/x86/mm/fault.c
--- 2/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@@ -10,7 -10,7 +10,7 @@@
   #include <linux/bootmem.h>            /* max_low_pfn                  */
   #include <linux/kprobes.h>            /* __kprobes, ...               */
   #include <linux/mmiotrace.h>          /* kmmio_handler, ...           */
- #include <linux/perf_counter.h>               /* perf_swcounter_event         */
+ #include <linux/perf_event.h>         /* perf_sw_event                */
   
   #include <asm/traps.h>                        /* dotraplinkage, ...           */
   #include <asm/pgalloc.h>              /* pgd_*(), ...                 */
@@@ -38,8 -38,7 +38,8 @@@ enum x86_pf_error_code 
    * Returns 0 if mmiotrace is disabled, or if the fault is not
    * handled by mmiotrace:
    */
- -static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+ +static inline int __kprobes
+ +kmmio_fault(struct pt_regs *regs, unsigned long addr)
   {
         if (unlikely(is_kmmio_active()))
                 if (kmmio_handler(regs, addr) == 1)
@@@ -47,7 -46,7 +47,7 @@@
         return 0;
   }
   
- -static inline int notify_page_fault(struct pt_regs *regs)
+ +static inline int __kprobes notify_page_fault(struct pt_regs *regs)
   {
         int ret = 0;
   
@@@ -240,7 -239,7 +240,7 @@@ void vmalloc_sync_all(void
    *
    *   Handle a fault on the vmalloc or module mapping area
    */
- -static noinline int vmalloc_fault(unsigned long address)
+ +static noinline __kprobes int vmalloc_fault(unsigned long address)
   {
         unsigned long pgd_paddr;
         pmd_t *pmd_k;
@@@ -286,26 -285,25 +286,25 @@@ check_v8086_mode(struct pt_regs *regs, 
                 tsk->thread.screen_bitmap |= 1 << bit;
   }
   
- static void dump_pagetable(unsigned long address)
+ static bool low_pfn(unsigned long pfn)
   {
-       __typeof__(pte_val(__pte(0))) page;
+       return pfn < max_low_pfn;
+ }
   
-       page = read_cr3();
-       page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+ static void dump_pagetable(unsigned long address)
+ {
+       pgd_t *base = __va(read_cr3());
+       pgd_t *pgd = &base[pgd_index(address)];
+       pmd_t *pmd;
+       pte_t *pte;
   
   #ifdef CONFIG_X86_PAE
-       printk("*pdpt = %016Lx ", page);
-       if ((page >> PAGE_SHIFT) < max_low_pfn
-           && page & _PAGE_PRESENT) {
-               page &= PAGE_MASK;
-               page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-                                                       & (PTRS_PER_PMD - 1)];
-               printk(KERN_CONT "*pde = %016Lx ", page);
-               page &= ~_PAGE_NX;
-       }
- #else
-       printk("*pde = %08lx ", page);
+       printk("*pdpt = %016Lx ", pgd_val(*pgd));
+       if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+               goto out;
   #endif
+       pmd = pmd_offset(pud_offset(pgd, address), address);
+       printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
   
         /*
          * We must not directly access the pte in the highpte
@@@ -313,16 -311,12 +312,12 @@@
          * And let's rather not kmap-atomic the pte, just in case
          * it's allocated already:
          */
-       if ((page >> PAGE_SHIFT) < max_low_pfn
-           && (page & _PAGE_PRESENT)
-           && !(page & _PAGE_PSE)) {
- 
-               page &= PAGE_MASK;
-               page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-                                                       & (PTRS_PER_PTE - 1)];
-               printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-       }
+       if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+               goto out;
   
+       pte = pte_offset_kernel(pmd, address);
+       printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+ out:
         printk("\n");
   }
   
@@@ -362,7 -356,7 +357,7 @@@ void vmalloc_sync_all(void
    *
    * This assumes no large pages in there.
    */
- -static noinline int vmalloc_fault(unsigned long address)
+ +static noinline __kprobes int vmalloc_fault(unsigned long address)
   {
         pgd_t *pgd, *pgd_ref;
         pud_t *pud, *pud_ref;
@@@ -451,16 -445,12 +446,12 @@@ static int bad_address(void *p
   
   static void dump_pagetable(unsigned long address)
   {
-       pgd_t *pgd;
+       pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+       pgd_t *pgd = base + pgd_index(address);
         pud_t *pud;
         pmd_t *pmd;
         pte_t *pte;
   
-       pgd = (pgd_t *)read_cr3();
- 
-       pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
- 
-       pgd += pgd_index(address);
         if (bad_address(pgd))
                 goto bad;
   
@@@ -859,7 -849,7 +850,7 @@@ static int spurious_fault_check(unsigne
    * There are no security implications to leaving a stale TLB when
    * increasing the permissions on a page.
    */
- -static noinline int
+ +static noinline __kprobes int
   spurious_fault(unsigned long error_code, unsigned long address)
   {
         pgd_t *pgd;
@@@ -1027,7 -1017,7 +1018,7 @@@ do_page_fault(struct pt_regs *regs, uns
         if (unlikely(error_code & PF_RSVD))
                 pgtable_bad(regs, error_code, address);
   
-       perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
   
         /*
          * If we're in an interrupt, have no user context or are running
@@@ -1124,11 -1114,11 +1115,11 @@@ good_area
   
         if (fault & VM_FAULT_MAJOR) {
                 tsk->maj_flt++;
-               perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
                                      regs, address);
         } else {
                 tsk->min_flt++;
-               perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
                                      regs, address);
         }
   
diff --combined include/linux/ftrace_event.h

index a256c8f782906691e0512538a00533737e010ec4,4ec5e67e18cfda40ad34a52187a3ff288682f6be..3451c55acb5953a084f6e24b5cd37954b2175882
--- 1/include/linux/ftrace_event.h
--- 2/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@@ -1,9 -1,10 +1,10 @@@
   #ifndef _LINUX_FTRACE_EVENT_H
   #define _LINUX_FTRACE_EVENT_H
   
- #include <linux/trace_seq.h>
   #include <linux/ring_buffer.h>
+ #include <linux/trace_seq.h>
   #include <linux/percpu.h>
+ #include <linux/hardirq.h>
   
   struct trace_array;
   struct tracer;
@@@ -34,7 -35,7 +35,7 @@@ struct trace_entry 
         unsigned char           flags;
         unsigned char           preempt_count;
         int                     pid;
-       int                     tgid;
+       int                     lock_depth;
   };
   
   #define FTRACE_MAX_EVENT                                              \
@@@ -116,12 -117,12 +117,12 @@@ struct ftrace_event_call 
         struct dentry           *dir;
         struct trace_event      *event;
         int                     enabled;
- -      int                     (*regfunc)(void *);
- -      void                    (*unregfunc)(void *);
+ +      int                     (*regfunc)(struct ftrace_event_call *);
+ +      void                    (*unregfunc)(struct ftrace_event_call *);
         int                     id;
- -      int                     (*raw_init)(void);
- -      int                     (*show_format)(struct ftrace_event_call *call,
- -                                             struct trace_seq *s);
+ +      int                     (*raw_init)(struct ftrace_event_call *);
+ +      int                     (*show_format)(struct ftrace_event_call *,
+ +                                             struct trace_seq *);
         int                     (*define_fields)(struct ftrace_event_call *);
         struct list_head        fields;
         int                     filter_active;
@@@ -130,12 -131,17 +131,17 @@@
         void                    *data;
   
         atomic_t                profile_count;
- -      int                     (*profile_enable)(void);
- -      void                    (*profile_disable)(void);
+ +      int                     (*profile_enable)(struct ftrace_event_call *);
+ +      void                    (*profile_disable)(struct ftrace_event_call *);
   };
   
+ #define FTRACE_MAX_PROFILE_SIZE       2048
+ 
+ extern char                   *trace_profile_buf;
+ extern char                   *trace_profile_buf_nmi;
+ 
   #define MAX_FILTER_PRED               32
- #define MAX_FILTER_STR_VAL    128
+ #define MAX_FILTER_STR_VAL    256     /* Should handle KSYM_SYMBOL_LEN */
   
   extern void destroy_preds(struct ftrace_event_call *call);
   extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
@@@ -151,12 -157,11 +157,12 @@@ enum 
         FILTER_PTR_STRING,
   };
   
- -extern int trace_define_field(struct ftrace_event_call *call,
- -                            const char *type, const char *name,
- -                            int offset, int size, int is_signed,
- -                            int filter_type);
   extern int trace_define_common_fields(struct ftrace_event_call *call);
+ +extern int trace_define_field(struct ftrace_event_call *call, const char *type,
+ +                            const char *name, int offset, int size,
+ +                            int is_signed, int filter_type);
+ +extern int trace_add_event_call(struct ftrace_event_call *call);
+ +extern void trace_remove_event_call(struct ftrace_event_call *call);
   
   #define is_signed_type(type)  (((type)(-1)) < 0)
   
diff --combined include/linux/kprobes.h

index 87eb79c9dd6074a3e4708c84393418beb7a9c39b,3a46b7b7abb219c40bf39ce4d5f4e448da131212..1b672f74a32f0d76d27c277c759c6c3a24135674
--- 1/include/linux/kprobes.h
--- 2/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@@ -48,13 -48,13 +48,13 @@@
   #define KPROBE_HIT_SSDONE     0x00000008
   
   /* Attach to insert probes on any functions which should be ignored*/
- #define __kprobes     __attribute__((__section__(".kprobes.text"))) notrace
+ #define __kprobes     __attribute__((__section__(".kprobes.text")))
   #else /* CONFIG_KPROBES */
   typedef int kprobe_opcode_t;
   struct arch_specific_insn {
         int dummy;
   };
- #define __kprobes     notrace
+ #define __kprobes
   #endif /* CONFIG_KPROBES */
   
   struct kprobe;
@@@ -296,8 -296,6 +296,8 @@@ void recycle_rp_inst(struct kretprobe_i
   int disable_kprobe(struct kprobe *kp);
   int enable_kprobe(struct kprobe *kp);
   
+ +void dump_kprobe(struct kprobe *kp);
+ +
   #else /* !CONFIG_KPROBES: */
   
   static inline int kprobes_built_in(void)
diff --combined include/linux/syscalls.h

index 317d913a1488464e191645667892793746a1517d,a990ace1a8380f01901b742a6b0821aff46a5d9d..b50974a93af0b83d2c1e3a826f4bdc6a8848ba62
--- 1/include/linux/syscalls.h
--- 2/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@@ -55,7 -55,7 +55,7 @@@ struct compat_timeval
   struct robust_list_head;
   struct getcpu_cache;
   struct old_linux_dirent;
- struct perf_counter_attr;
+ struct perf_event_attr;
   
   #include <linux/types.h>
   #include <linux/aio_abi.h>
@@@ -100,33 -100,25 +100,25 @@@
   
   #ifdef CONFIG_EVENT_PROFILE
   #define TRACE_SYS_ENTER_PROFILE(sname)                                               \
- static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call)  \
- -static int prof_sysenter_enable_##sname(void)                                \
++static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused)      \
   {                                                                            \
-       int ret = 0;                                                           \
-       if (!atomic_inc_return(&event_enter_##sname.profile_count))            \
-               ret = reg_prof_syscall_enter("sys"#sname);                     \
-       return ret;                                                            \
+       return reg_prof_syscall_enter("sys"#sname);                            \
   }                                                                            \
                                                                                \
- static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\
- -static void prof_sysenter_disable_##sname(void)                                      \
++static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused)    \
   {                                                                            \
-       if (atomic_add_negative(-1, &event_enter_##sname.profile_count))       \
-               unreg_prof_syscall_enter("sys"#sname);                         \
+       unreg_prof_syscall_enter("sys"#sname);                                 \
   }
   
   #define TRACE_SYS_EXIT_PROFILE(sname)                                        \
- static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call)   \
- -static int prof_sysexit_enable_##sname(void)                                 \
++static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused)       \
   {                                                                            \
-       int ret = 0;                                                           \
-       if (!atomic_inc_return(&event_exit_##sname.profile_count))             \
-               ret = reg_prof_syscall_exit("sys"#sname);                      \
-       return ret;                                                            \
+       return reg_prof_syscall_exit("sys"#sname);                             \
   }                                                                            \
                                                                                \
- static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
- -static void prof_sysexit_disable_##sname(void)                                       \
++static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
   {                                                                              \
-       if (atomic_add_negative(-1, &event_exit_##sname.profile_count))        \
-               unreg_prof_syscall_exit("sys"#sname);                          \
+       unreg_prof_syscall_exit("sys"#sname);                                  \
   }
   
   #define TRACE_SYS_ENTER_PROFILE_INIT(sname)                                  \
@@@ -165,7 -157,7 +157,7 @@@
         struct trace_event enter_syscall_print_##sname = {              \
                 .trace                  = print_syscall_enter,          \
         };                                                              \
- -      static int init_enter_##sname(void)                             \
+ +      static int init_enter_##sname(struct ftrace_event_call *call)   \
         {                                                               \
                 int num, id;                                            \
                 num = syscall_name_to_nr("sys"#sname);                  \
@@@ -201,7 -193,7 +193,7 @@@
         struct trace_event exit_syscall_print_##sname = {               \
                 .trace                  = print_syscall_exit,           \
         };                                                              \
- -      static int init_exit_##sname(void)                              \
+ +      static int init_exit_##sname(struct ftrace_event_call *call)    \
         {                                                               \
                 int num, id;                                            \
                 num = syscall_name_to_nr("sys"#sname);                  \
@@@ -468,8 -460,7 +460,7 @@@ asmlinkage long sys_mount(char __user *
                                 void __user *data);
   asmlinkage long sys_umount(char __user *name, int flags);
   asmlinkage long sys_oldumount(char __user *name);
- asmlinkage long sys_truncate(const char __user *path,
-                               unsigned long length);
+ asmlinkage long sys_truncate(const char __user *path, long length);
   asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
   asmlinkage long sys_stat(char __user *filename,
                         struct __old_kernel_stat __user *statbuf);
@@@ -885,7 -876,7 +876,7 @@@ asmlinkage long sys_ppoll(struct pollf
   int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
   
   
- asmlinkage long sys_perf_counter_open(
-               struct perf_counter_attr __user *attr_uptr,
+ asmlinkage long sys_perf_event_open(
+               struct perf_event_attr __user *attr_uptr,
                 pid_t pid, int cpu, int group_fd, unsigned long flags);
   #endif
diff --combined include/trace/ftrace.h

index 5d3df2a5049d2d4cced1bdb530df2ef979b79e8f,cc0d9667e182d14d9d84ffc47d9e21078d96d067..54d02c06ae7e94ccfec800dc7472fcef84664c9e
--- 1/include/trace/ftrace.h
--- 2/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@@ -239,9 -239,9 +239,9 @@@ ftrace_format_##call(struct ftrace_even
   #undef __print_flags
   #define __print_flags(flag, delim, flag_array...)                     \
         ({                                                              \
-               static const struct trace_print_flags flags[] =         \
+               static const struct trace_print_flags __flags[] =       \
                         { flag_array, { -1, NULL }};                    \
-               ftrace_print_flags_seq(p, delim, flag, flags);          \
+               ftrace_print_flags_seq(p, delim, flag, __flags);        \
         })
   
   #undef __print_symbolic
@@@ -254,7 -254,7 +254,7 @@@
   
   #undef TRACE_EVENT
   #define TRACE_EVENT(call, proto, args, tstruct, assign, print)                \
- enum print_line_t                                                     \
+ static enum print_line_t                                              \
   ftrace_raw_output_##call(struct trace_iterator *iter, int flags)      \
   {                                                                     \
         struct trace_seq *s = &iter->seq;                               \
@@@ -317,7 -317,7 +317,7 @@@
   
   #undef TRACE_EVENT
   #define TRACE_EVENT(call, proto, args, tstruct, func, print)          \
- int                                                                   \
+ static int                                                            \
   ftrace_define_fields_##call(struct ftrace_event_call *event_call)     \
   {                                                                     \
         struct ftrace_raw_##call field;                                 \
@@@ -378,24 -378,18 +378,18 @@@ static inline int ftrace_get_offsets_##
   #ifdef CONFIG_EVENT_PROFILE
   
   /*
-  * Generate the functions needed for tracepoint perf_counter support.
+  * Generate the functions needed for tracepoint perf_event support.
    *
    * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
    *
-  * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
+  * static int ftrace_profile_enable_<call>(void)
    * {
-  *    int ret = 0;
-  *
-  *    if (!atomic_inc_return(&event_call->profile_count))
-  *            ret = register_trace_<call>(ftrace_profile_<call>);
-  *
-  *    return ret;
+  *    return register_trace_<call>(ftrace_profile_<call>);
    * }
    *
-  * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
+  * static void ftrace_profile_disable_<call>(void)
    * {
-  *    if (atomic_add_negative(-1, &event->call->profile_count))
-  *            unregister_trace_<call>(ftrace_profile_<call>);
+  *    unregister_trace_<call>(ftrace_profile_<call>);
    * }
    *
    */
@@@ -405,20 -399,14 +399,14 @@@
                                                                         \
   static void ftrace_profile_##call(proto);                             \
                                                                         \
- static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
- -static int ftrace_profile_enable_##call(void)                         \
++static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\
   {                                                                     \
-       int ret = 0;                                                    \
-                                                                       \
-       if (!atomic_inc_return(&event_call->profile_count))             \
-               ret = register_trace_##call(ftrace_profile_##call);     \
-                                                                       \
-       return ret;                                                     \
+       return register_trace_##call(ftrace_profile_##call);            \
   }                                                                     \
                                                                         \
- static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
- -static void ftrace_profile_disable_##call(void)                               \
++static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
   {                                                                     \
-       if (atomic_add_negative(-1, &event_call->profile_count))        \
-               unregister_trace_##call(ftrace_profile_##call);         \
+       unregister_trace_##call(ftrace_profile_##call);                 \
   }
   
   #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
@@@ -435,7 -423,7 +423,7 @@@
    *    event_trace_printk(_RET_IP_, "<call>: " <fmt>);
    * }
    *
- - * static int ftrace_reg_event_<call>(void)
+ + * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
    * {
    *    int ret;
    *
@@@ -446,7 -434,7 +434,7 @@@
    *    return ret;
    * }
    *
- - * static void ftrace_unreg_event_<call>(void)
+ + * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
    * {
    *    unregister_trace_<call>(ftrace_event_<call>);
    * }
@@@ -481,7 -469,7 +469,7 @@@
    *    trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc);
    * }
    *
- - * static int ftrace_raw_reg_event_<call>(void)
+ + * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
    * {
    *    int ret;
    *
@@@ -492,7 -480,7 +480,7 @@@
    *    return ret;
    * }
    *
- - * static void ftrace_unreg_event_<call>(void)
+ + * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
    * {
    *    unregister_trace_<call>(ftrace_raw_event_<call>);
    * }
@@@ -501,7 -489,7 +489,7 @@@
    *    .trace                  = ftrace_raw_output_<call>, <-- stage 2
    * };
    *
- - * static int ftrace_raw_init_event_<call>(void)
+ + * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
    * {
    *    int id;
    *
@@@ -598,7 -586,7 +586,7 @@@ static void ftrace_raw_event_##call(pro
                                                   event, irq_flags, pc); \
   }                                                                     \
                                                                         \
- -static int ftrace_raw_reg_event_##call(void *ptr)                     \
+ +static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
   {                                                                     \
         int ret;                                                        \
                                                                         \
@@@ -609,7 -597,7 +597,7 @@@
         return ret;                                                     \
   }                                                                     \
                                                                         \
- -static void ftrace_raw_unreg_event_##call(void *ptr)                  \
+ +static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
   {                                                                     \
         unregister_trace_##call(ftrace_raw_event_##call);               \
   }                                                                     \
@@@ -618,7 -606,7 +606,7 @@@ static struct trace_event ftrace_event_
         .trace                  = ftrace_raw_output_##call,             \
   };                                                                    \
                                                                         \
- -static int ftrace_raw_init_event_##call(void)                         \
+ +static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
   {                                                                     \
         int id;                                                         \
                                                                         \
@@@ -656,15 -644,16 +644,16 @@@ __attribute__((section("_ftrace_events"
    * {
    *    struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
    *    struct ftrace_event_call *event_call = &event_<call>;
-  *    extern void perf_tpcounter_event(int, u64, u64, void *, int);
+  *    extern void perf_tp_event(int, u64, u64, void *, int);
    *    struct ftrace_raw_##call *entry;
    *    u64 __addr = 0, __count = 1;
    *    unsigned long irq_flags;
+  *    struct trace_entry *ent;
    *    int __entry_size;
    *    int __data_size;
+  *    int __cpu
    *    int pc;
    *
-  *    local_save_flags(irq_flags);
    *    pc = preempt_count();
    *
    *    __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
@@@ -675,25 -664,34 +664,34 @@@
    *                         sizeof(u64));
    *    __entry_size -= sizeof(u32);
    *
-  *    do {
-  *            char raw_data[__entry_size]; <- allocate our sample in the stack
-  *            struct trace_entry *ent;
+  *    // Protect the non nmi buffer
+  *    // This also protects the rcu read side
+  *    local_irq_save(irq_flags);
+  *    __cpu = smp_processor_id();
+  *
+  *    if (in_nmi())
+  *            raw_data = rcu_dereference(trace_profile_buf_nmi);
+  *    else
+  *            raw_data = rcu_dereference(trace_profile_buf);
+  *
+  *    if (!raw_data)
+  *            goto end;
    *
-  *            zero dead bytes from alignment to avoid stack leak to userspace:
+  *    raw_data = per_cpu_ptr(raw_data, __cpu);
    *
-  *            *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
-  *            entry = (struct ftrace_raw_<call> *)raw_data;
-  *            ent = &entry->ent;
-  *            tracing_generic_entry_update(ent, irq_flags, pc);
-  *            ent->type = event_call->id;
+  *    //zero dead bytes from alignment to avoid stack leak to userspace:
+  *    *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
+  *    entry = (struct ftrace_raw_<call> *)raw_data;
+  *    ent = &entry->ent;
+  *    tracing_generic_entry_update(ent, irq_flags, pc);
+  *    ent->type = event_call->id;
    *
-  *            <tstruct> <- do some jobs with dynamic arrays
+  *    <tstruct> <- do some jobs with dynamic arrays
    *
-  *            <assign>  <- affect our values
+  *    <assign>  <- affect our values
    *
-  *            perf_tpcounter_event(event_call->id, __addr, __count, entry,
-  *                         __entry_size);  <- submit them to perf counter
-  *    } while (0);
+  *    perf_tp_event(event_call->id, __addr, __count, entry,
+  *                 __entry_size);  <- submit them to perf counter
    *
    * }
    */
@@@ -712,15 -710,17 +710,17 @@@ static void ftrace_profile_##call(proto
   {                                                                     \
         struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
         struct ftrace_event_call *event_call = &event_##call;           \
-       extern void perf_tpcounter_event(int, u64, u64, void *, int);   \
+       extern void perf_tp_event(int, u64, u64, void *, int);  \
         struct ftrace_raw_##call *entry;                                \
         u64 __addr = 0, __count = 1;                                    \
         unsigned long irq_flags;                                        \
+       struct trace_entry *ent;                                        \
         int __entry_size;                                               \
         int __data_size;                                                \
+       char *raw_data;                                                 \
+       int __cpu;                                                      \
         int pc;                                                         \
                                                                         \
-       local_save_flags(irq_flags);                                    \
         pc = preempt_count();                                           \
                                                                         \
         __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
@@@ -728,23 -728,38 +728,38 @@@
                              sizeof(u64));                              \
         __entry_size -= sizeof(u32);                                    \
                                                                         \
-       do {                                                            \
-               char raw_data[__entry_size];                            \
-               struct trace_entry *ent;                                \
+       if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE,           \
+                     "profile buffer not large enough"))               \
+               return;                                                 \
+                                                                       \
+       local_irq_save(irq_flags);                                      \
+       __cpu = smp_processor_id();                                     \
                                                                         \
-               *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
-               entry = (struct ftrace_raw_##call *)raw_data;           \
-               ent = &entry->ent;                                      \
-               tracing_generic_entry_update(ent, irq_flags, pc);       \
-               ent->type = event_call->id;                             \
+       if (in_nmi())                                                   \
+               raw_data = rcu_dereference(trace_profile_buf_nmi);              \
+       else                                                            \
+               raw_data = rcu_dereference(trace_profile_buf);          \
                                                                         \
-               tstruct                                                 \
+       if (!raw_data)                                                  \
+               goto end;                                               \
                                                                         \
-               { assign; }                                             \
+       raw_data = per_cpu_ptr(raw_data, __cpu);                        \
                                                                         \
-               perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+       *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;         \
+       entry = (struct ftrace_raw_##call *)raw_data;                   \
+       ent = &entry->ent;                                              \
+       tracing_generic_entry_update(ent, irq_flags, pc);               \
+       ent->type = event_call->id;                                     \
+                                                                       \
+       tstruct                                                         \
+                                                                       \
+       { assign; }                                                     \
+                                                                       \
+       perf_tp_event(event_call->id, __addr, __count, entry,           \
                              __entry_size);                             \
-       } while (0);                                                    \
+                                                                       \
+ end:                                                                  \
+       local_irq_restore(irq_flags);                                   \
                                                                         \
   }
   
diff --combined kernel/kprobes.c

index b946761f84bd33d90eabab1596e635f43f0cb113,cfadc1291d0badb247275feacb720fa3408b9c44..b466afa4e1481e394ef493752cc3e9badf3bf25d
--- 1/kernel/kprobes.c
--- 2/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@@ -90,9 -90,6 +90,9 @@@ static spinlock_t *kretprobe_table_lock
    */
   static struct kprobe_blackpoint kprobe_blacklist[] = {
         {"preempt_schedule",},
+ +      {"native_get_debugreg",},
+ +      {"irq_entries_start",},
+ +      {"common_interrupt",},
         {NULL}    /* Terminator */
   };
   
@@@ -676,40 -673,6 +676,40 @@@ static kprobe_opcode_t __kprobes *kprob
         return (kprobe_opcode_t *)(((char *)addr) + p->offset);
   }
   
+ +/* Check passed kprobe is valid and return kprobe in kprobe_table. */
+ +static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+ +{
+ +      struct kprobe *old_p, *list_p;
+ +
+ +      old_p = get_kprobe(p->addr);
+ +      if (unlikely(!old_p))
+ +              return NULL;
+ +
+ +      if (p != old_p) {
+ +              list_for_each_entry_rcu(list_p, &old_p->list, list)
+ +                      if (list_p == p)
+ +                      /* kprobe p is a valid probe */
+ +                              goto valid;
+ +              return NULL;
+ +      }
+ +valid:
+ +      return old_p;
+ +}
+ +
+ +/* Return error if the kprobe is being re-registered */
+ +static inline int check_kprobe_rereg(struct kprobe *p)
+ +{
+ +      int ret = 0;
+ +      struct kprobe *old_p;
+ +
+ +      mutex_lock(&kprobe_mutex);
+ +      old_p = __get_valid_kprobe(p);
+ +      if (old_p)
+ +              ret = -EINVAL;
+ +      mutex_unlock(&kprobe_mutex);
+ +      return ret;
+ +}
+ +
   int __kprobes register_kprobe(struct kprobe *p)
   {
         int ret = 0;
@@@ -722,10 -685,6 +722,10 @@@
                 return -EINVAL;
         p->addr = addr;
   
+ +      ret = check_kprobe_rereg(p);
+ +      if (ret)
+ +              return ret;
+ +
         preempt_disable();
         if (!kernel_text_address((unsigned long) p->addr) ||
             in_kprobes_functions((unsigned long) p->addr)) {
@@@ -795,6 -754,26 +795,6 @@@ out
   }
   EXPORT_SYMBOL_GPL(register_kprobe);
   
- -/* Check passed kprobe is valid and return kprobe in kprobe_table. */
- -static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
- -{
- -      struct kprobe *old_p, *list_p;
- -
- -      old_p = get_kprobe(p->addr);
- -      if (unlikely(!old_p))
- -              return NULL;
- -
- -      if (p != old_p) {
- -              list_for_each_entry_rcu(list_p, &old_p->list, list)
- -                      if (list_p == p)
- -                      /* kprobe p is a valid probe */
- -                              goto valid;
- -              return NULL;
- -      }
- -valid:
- -      return old_p;
- -}
- -
   /*
    * Unregister a kprobe without a scheduler synchronization.
    */
@@@ -1162,13 -1141,6 +1162,13 @@@ static void __kprobes kill_kprobe(struc
         arch_remove_kprobe(p);
   }
   
+ +void __kprobes dump_kprobe(struct kprobe *kp)
+ +{
+ +      printk(KERN_WARNING "Dumping kprobe:\n");
+ +      printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
+ +             kp->symbol_name, kp->addr, kp->offset);
+ +}
+ +
   /* Module notifier call back, checking kprobes on the module */
   static int __kprobes kprobes_module_callback(struct notifier_block *nb,
                                              unsigned long val, void *data)
@@@ -1349,7 -1321,7 +1349,7 @@@ static int __kprobes show_kprobe_addr(s
         return 0;
   }
   
- static struct seq_operations kprobes_seq_ops = {
+ static const struct seq_operations kprobes_seq_ops = {
         .start = kprobe_seq_start,
         .next  = kprobe_seq_next,
         .stop  = kprobe_seq_stop,
diff --combined kernel/trace/Kconfig

index e78dcbde1a81fbdebee0896d9f640fc2cabf5748,b416512ad17ff77eea13b6b391d907f32f76143b..15372a9f23999c80c12cb118b0a2b1f50f73763a
--- 1/kernel/trace/Kconfig
--- 2/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@@ -11,12 -11,18 +11,18 @@@ config NOP_TRACE
   
   config HAVE_FTRACE_NMI_ENTER
         bool
+       help
+         See Documentation/trace/ftrace-implementation.txt
   
   config HAVE_FUNCTION_TRACER
         bool
+       help
+         See Documentation/trace/ftrace-implementation.txt
   
   config HAVE_FUNCTION_GRAPH_TRACER
         bool
+       help
+         See Documentation/trace/ftrace-implementation.txt
   
   config HAVE_FUNCTION_GRAPH_FP_TEST
         bool
@@@ -28,21 -34,25 +34,25 @@@
   config HAVE_FUNCTION_TRACE_MCOUNT_TEST
         bool
         help
-        This gets selected when the arch tests the function_trace_stop
-        variable at the mcount call site. Otherwise, this variable
-        is tested by the called function.
+         See Documentation/trace/ftrace-implementation.txt
   
   config HAVE_DYNAMIC_FTRACE
         bool
+       help
+         See Documentation/trace/ftrace-implementation.txt
   
   config HAVE_FTRACE_MCOUNT_RECORD
         bool
+       help
+         See Documentation/trace/ftrace-implementation.txt
   
   config HAVE_HW_BRANCH_TRACER
         bool
   
   config HAVE_SYSCALL_TRACEPOINTS
         bool
+       help
+         See Documentation/trace/ftrace-implementation.txt
   
   config TRACER_MAX_TRACE
         bool
@@@ -73,7 -83,7 +83,7 @@@ config RING_BUFFER_ALLOW_SWA
   # This allows those options to appear when no other tracer is selected. But the
   # options do not appear when something else selects it. We need the two options
   # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
- # hidding of the automatic options options.
+ # hidding of the automatic options.
   
   config TRACING
         bool
@@@ -418,18 -428,6 +428,18 @@@ config BLK_DEV_IO_TRAC
   
           If unsure, say N.
   
+ +config KPROBE_TRACER
+ +      depends on KPROBES
+ +      depends on X86
+ +      bool "Trace kprobes"
+ +      select TRACING
+ +      select GENERIC_TRACER
+ +      help
+ +        This tracer probes everywhere where kprobes can probe it, and
+ +        records various registers and memories specified by user.
+ +        This also allows you to trace kprobe probe points as a dynamic
+ +        defined events. It provides per-probe event filtering interface.
+ +
   config DYNAMIC_FTRACE
         bool "enable/disable ftrace tracepoints dynamically"
         depends on FUNCTION_TRACER
@@@ -481,6 -479,18 +491,18 @@@ config FTRACE_STARTUP_TES
           functioning properly. It will do tests on all the configured
           tracers of ftrace.
   
+ config EVENT_TRACE_TEST_SYSCALLS
+       bool "Run selftest on syscall events"
+       depends on FTRACE_STARTUP_TEST
+       help
+        This option will also enable testing every syscall event.
+        It only enables the event and disables it and runs various loads
+        with the event enabled. This adds a bit more time for kernel boot
+        up since it runs this on every system call defined.
+ 
+        TBD - enable a way to actually call the syscalls as we test their
+              events
+ 
   config MMIOTRACE
         bool "Memory mapped IO tracing"
         depends on HAVE_MMIOTRACE_SUPPORT && PCI
diff --combined kernel/trace/Makefile

index 7c00a1ec1496dd56296c1c3a17ae6bccdfc853a2,26f03ac07c2bc2164ce809cea5a48fce15d09ff1..c8cb75d7f280188538f0783bd1c1547fd6dec579
--- 1/kernel/trace/Makefile
--- 2/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@@ -42,7 -42,6 +42,6 @@@ obj-$(CONFIG_BOOT_TRACER) += trace_boot
   obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
   obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
   obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
- obj-$(CONFIG_POWER_TRACER) += trace_power.o
   obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
   obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
   obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
@@@ -54,6 -53,6 +53,7 @@@ obj-$(CONFIG_EVENT_TRACING) += trace_ex
   obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
   obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
   obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+ +obj-$(CONFIG_KPROBE_TRACER) += trace_kprobe.o
+ obj-$(CONFIG_EVENT_TRACING) += power-traces.o
   
   libftrace-y := ftrace.o
diff --combined kernel/trace/trace.h

index 821064914c8052d1658a76e46c0f6916165b87cd,405cb850b75d9a308d04d198946e9b0e8da21a39..104c1a72418fe65379b4a1e8de96ab3bc53b7ab6
--- 1/kernel/trace/trace.h
--- 2/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@@ -7,10 -7,10 +7,10 @@@
   #include <linux/clocksource.h>
   #include <linux/ring_buffer.h>
   #include <linux/mmiotrace.h>
+ #include <linux/tracepoint.h>
   #include <linux/ftrace.h>
   #include <trace/boot.h>
   #include <linux/kmemtrace.h>
- #include <trace/power.h>
   
   #include <linux/trace_seq.h>
   #include <linux/ftrace_event.h>
@@@ -36,163 -36,59 +36,59 @@@ enum trace_type 
         TRACE_HW_BRANCHES,
         TRACE_KMEM_ALLOC,
         TRACE_KMEM_FREE,
-       TRACE_POWER,
         TRACE_BLK,
   
         __TRACE_LAST_TYPE,
   };
   
- /*
-  * Function trace entry - function address and parent function addres:
-  */
- struct ftrace_entry {
-       struct trace_entry      ent;
-       unsigned long           ip;
-       unsigned long           parent_ip;
- };
- 
- /* Function call entry */
- struct ftrace_graph_ent_entry {
-       struct trace_entry              ent;
-       struct ftrace_graph_ent         graph_ent;
+ enum kmemtrace_type_id {
+       KMEMTRACE_TYPE_KMALLOC = 0,     /* kmalloc() or kfree(). */
+       KMEMTRACE_TYPE_CACHE,           /* kmem_cache_*(). */
+       KMEMTRACE_TYPE_PAGES,           /* __get_free_pages() and friends. */
   };
   
- /* Function return entry */
- struct ftrace_graph_ret_entry {
-       struct trace_entry              ent;
-       struct ftrace_graph_ret         ret;
- };
   extern struct tracer boot_tracer;
   
- /*
-  * Context switch trace entry - which task (and prio) we switched from/to:
-  */
- struct ctx_switch_entry {
-       struct trace_entry      ent;
-       unsigned int            prev_pid;
-       unsigned char           prev_prio;
-       unsigned char           prev_state;
-       unsigned int            next_pid;
-       unsigned char           next_prio;
-       unsigned char           next_state;
-       unsigned int            next_cpu;
- };
- 
- /*
-  * Special (free-form) trace entry:
-  */
- struct special_entry {
-       struct trace_entry      ent;
-       unsigned long           arg1;
-       unsigned long           arg2;
-       unsigned long           arg3;
- };
- 
- /*
-  * Stack-trace entry:
-  */
- 
- #define FTRACE_STACK_ENTRIES  8
+ #undef __field
+ #define __field(type, item)           type    item;
   
- struct stack_entry {
-       struct trace_entry      ent;
-       unsigned long           caller[FTRACE_STACK_ENTRIES];
- };
+ #undef __field_struct
+ #define __field_struct(type, item)    __field(type, item)
   
- struct userstack_entry {
-       struct trace_entry      ent;
-       unsigned long           caller[FTRACE_STACK_ENTRIES];
- };
+ #undef __field_desc
+ #define __field_desc(type, container, item)
   
- /*
-  * trace_printk entry:
-  */
- struct bprint_entry {
-       struct trace_entry      ent;
-       unsigned long           ip;
-       const char              *fmt;
-       u32                     buf[];
- };
+ #undef __array
+ #define __array(type, item, size)     type    item[size];
   
- struct print_entry {
-       struct trace_entry      ent;
-       unsigned long           ip;
-       char                    buf[];
- };
- 
- #define TRACE_OLD_SIZE                88
- 
- struct trace_field_cont {
-       unsigned char           type;
-       /* Temporary till we get rid of this completely */
-       char                    buf[TRACE_OLD_SIZE - 1];
- };
+ #undef __array_desc
+ #define __array_desc(type, container, item, size)
   
- struct trace_mmiotrace_rw {
-       struct trace_entry      ent;
-       struct mmiotrace_rw     rw;
- };
+ #undef __dynamic_array
+ #define __dynamic_array(type, item)   type    item[];
   
- struct trace_mmiotrace_map {
-       struct trace_entry      ent;
-       struct mmiotrace_map    map;
- };
- 
- struct trace_boot_call {
-       struct trace_entry      ent;
-       struct boot_trace_call boot_call;
- };
+ #undef F_STRUCT
+ #define F_STRUCT(args...)             args
   
- struct trace_boot_ret {
-       struct trace_entry      ent;
-       struct boot_trace_ret boot_ret;
- };
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print)   \
+       struct struct_name {                                    \
+               struct trace_entry      ent;                    \
+               tstruct                                         \
+       }
   
- #define TRACE_FUNC_SIZE 30
- #define TRACE_FILE_SIZE 20
- struct trace_branch {
-       struct trace_entry      ent;
-       unsigned                line;
-       char                    func[TRACE_FUNC_SIZE+1];
-       char                    file[TRACE_FILE_SIZE+1];
-       char                    correct;
- };
+ #undef TP_ARGS
+ #define TP_ARGS(args...)      args
   
- struct hw_branch_entry {
-       struct trace_entry      ent;
-       u64                     from;
-       u64                     to;
- };
+ #undef FTRACE_ENTRY_DUP
+ #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
   
- struct trace_power {
-       struct trace_entry      ent;
-       struct power_trace      state_data;
- };
- 
- enum kmemtrace_type_id {
-       KMEMTRACE_TYPE_KMALLOC = 0,     /* kmalloc() or kfree(). */
-       KMEMTRACE_TYPE_CACHE,           /* kmem_cache_*(). */
-       KMEMTRACE_TYPE_PAGES,           /* __get_free_pages() and friends. */
- };
- 
- struct kmemtrace_alloc_entry {
-       struct trace_entry      ent;
-       enum kmemtrace_type_id type_id;
-       unsigned long call_site;
-       const void *ptr;
-       size_t bytes_req;
-       size_t bytes_alloc;
-       gfp_t gfp_flags;
-       int node;
- };
- 
- struct kmemtrace_free_entry {
-       struct trace_entry      ent;
-       enum kmemtrace_type_id type_id;
-       unsigned long call_site;
-       const void *ptr;
- };
+ #include "trace_entries.h"
   
+ /*
+  * syscalls are special, and need special handling, this is why
+  * they are not included in trace_entries.h
+  */
   struct syscall_trace_enter {
         struct trace_entry      ent;
         int                     nr;
@@@ -205,37 -101,12 +101,35 @@@ struct syscall_trace_exit 
         unsigned long           ret;
   };
   
- 
- 
+ +struct kprobe_trace_entry {
+ +      struct trace_entry      ent;
+ +      unsigned long           ip;
+ +      int                     nargs;
+ +      unsigned long           args[];
+ +};
+ +
+ +#define SIZEOF_KPROBE_TRACE_ENTRY(n)                  \
+ +      (offsetof(struct kprobe_trace_entry, args) +    \
+ +      (sizeof(unsigned long) * (n)))
+ +
+ +struct kretprobe_trace_entry {
+ +      struct trace_entry      ent;
+ +      unsigned long           func;
+ +      unsigned long           ret_ip;
+ +      int                     nargs;
+ +      unsigned long           args[];
+ +};
+ +
+ +#define SIZEOF_KRETPROBE_TRACE_ENTRY(n)                       \
+ +      (offsetof(struct kretprobe_trace_entry, args) + \
+ +      (sizeof(unsigned long) * (n)))
+ +
   /*
    * trace_flag_type is an enumeration that holds different
    * states when a trace occurs. These are:
    *  IRQS_OFF          - interrupts were disabled
    *  IRQS_NOSUPPORT    - arch does not support irqs_disabled_flags
-  *  NEED_RESCED               - reschedule is requested
+  *  NEED_RESCHED      - reschedule is requested
    *  HARDIRQ           - inside an interrupt handler
    *  SOFTIRQ           - inside a softirq handler
    */
@@@ -334,7 -205,6 +228,6 @@@ extern void __ftrace_bad_type(void)
                 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,      \
                           TRACE_GRAPH_RET);             \
                 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
-               IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
                 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,       \
                           TRACE_KMEM_ALLOC);    \
                 IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
@@@ -414,7 -284,6 +307,6 @@@ struct tracer 
         struct tracer           *next;
         int                     print_max;
         struct tracer_flags     *flags;
-       struct tracer_stat      *stats;
   };
   
   
@@@ -493,6 -362,7 +385,7 @@@ void tracing_stop_sched_switch_record(v
   void tracing_start_sched_switch_record(void);
   int register_tracer(struct tracer *type);
   void unregister_tracer(struct tracer *type);
+ int is_tracing_stopped(void);
   
   extern unsigned long nsecs_to_usecs(unsigned long nsecs);
   
@@@ -533,20 -403,6 +426,6 @@@ static inline void __trace_stack(struc
   
   extern cycle_t ftrace_now(int cpu);
   
- #ifdef CONFIG_CONTEXT_SWITCH_TRACER
- typedef void
- (*tracer_switch_func_t)(void *private,
-                       void *__rq,
-                       struct task_struct *prev,
-                       struct task_struct *next);
- 
- struct tracer_switch_ops {
-       tracer_switch_func_t            func;
-       void                            *private;
-       struct tracer_switch_ops        *next;
- };
- #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
- 
   extern void trace_find_cmdline(int pid, char comm[]);
   
   #ifdef CONFIG_DYNAMIC_FTRACE
@@@ -661,6 -517,41 +540,41 @@@ static inline int ftrace_trace_task(str
   }
   #endif
   
+ /*
+  * struct trace_parser - servers for reading the user input separated by spaces
+  * @cont: set if the input is not complete - no final space char was found
+  * @buffer: holds the parsed user input
+  * @idx: user input lenght
+  * @size: buffer size
+  */
+ struct trace_parser {
+       bool            cont;
+       char            *buffer;
+       unsigned        idx;
+       unsigned        size;
+ };
+ 
+ static inline bool trace_parser_loaded(struct trace_parser *parser)
+ {
+       return (parser->idx != 0);
+ }
+ 
+ static inline bool trace_parser_cont(struct trace_parser *parser)
+ {
+       return parser->cont;
+ }
+ 
+ static inline void trace_parser_clear(struct trace_parser *parser)
+ {
+       parser->cont = false;
+       parser->idx = 0;
+ }
+ 
+ extern int trace_parser_get_init(struct trace_parser *parser, int size);
+ extern void trace_parser_put(struct trace_parser *parser);
+ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
+       size_t cnt, loff_t *ppos);
+ 
   /*
    * trace_iterator_flags is an enumeration that defines bit
    * positions into trace_flags that controls the output.
@@@ -847,58 -738,18 +761,18 @@@ filter_check_discard(struct ftrace_even
         return 0;
   }
   
- #define DEFINE_COMPARISON_PRED(type)                                  \
- static int filter_pred_##type(struct filter_pred *pred, void *event,  \
-                             int val1, int val2)                       \
- {                                                                     \
-       type *addr = (type *)(event + pred->offset);                    \
-       type val = (type)pred->val;                                     \
-       int match = 0;                                                  \
-                                                                       \
-       switch (pred->op) {                                             \
-       case OP_LT:                                                     \
-               match = (*addr < val);                                  \
-               break;                                                  \
-       case OP_LE:                                                     \
-               match = (*addr <= val);                                 \
-               break;                                                  \
-       case OP_GT:                                                     \
-               match = (*addr > val);                                  \
-               break;                                                  \
-       case OP_GE:                                                     \
-               match = (*addr >= val);                                 \
-               break;                                                  \
-       default:                                                        \
-               break;                                                  \
-       }                                                               \
-                                                                       \
-       return match;                                                   \
- }
- 
- #define DEFINE_EQUALITY_PRED(size)                                    \
- static int filter_pred_##size(struct filter_pred *pred, void *event,  \
-                             int val1, int val2)                       \
- {                                                                     \
-       u##size *addr = (u##size *)(event + pred->offset);              \
-       u##size val = (u##size)pred->val;                               \
-       int match;                                                      \
-                                                                       \
-       match = (val == *addr) ^ pred->not;                             \
-                                                                       \
-       return match;                                                   \
- }
- 
   extern struct mutex event_mutex;
   extern struct list_head ftrace_events;
   
   extern const char *__start___trace_bprintk_fmt[];
   extern const char *__stop___trace_bprintk_fmt[];
   
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)    \
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(call, struct_name, id, tstruct, print)           \
         extern struct ftrace_event_call event_##call;
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
- #include "trace_event_types.h"
+ #undef FTRACE_ENTRY_DUP
+ #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print)               \
+       FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
+ #include "trace_entries.h"
   
   #endif /* _LINUX_KERNEL_TRACE_H */
diff --combined kernel/trace/trace_event_profile.c

index 11ba5bb4ed0a71f0c3da5e8a2ce73471456aec47,dd44b8768867f4e312d8792a98f5b38c015c605a..e812f1c1264cffb4ca36803dd0b11fc87f90d450
--- 1/kernel/trace/trace_event_profile.c
--- 2/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@@ -5,8 -5,60 +5,60 @@@
    *
    */
   
+ #include <linux/module.h>
   #include "trace.h"
   
- -      ret = event->profile_enable();
+ /*
+  * We can't use a size but a type in alloc_percpu()
+  * So let's create a dummy type that matches the desired size
+  */
+ typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
+ 
+ char          *trace_profile_buf;
+ EXPORT_SYMBOL_GPL(trace_profile_buf);
+ 
+ char          *trace_profile_buf_nmi;
+ EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
+ 
+ /* Count the events in use (per event id, not per instance) */
+ static int    total_profile_count;
+ 
+ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
+ {
+       char *buf;
+       int ret = -ENOMEM;
+ 
+       if (atomic_inc_return(&event->profile_count))
+               return 0;
+ 
+       if (!total_profile_count++) {
+               buf = (char *)alloc_percpu(profile_buf_t);
+               if (!buf)
+                       goto fail_buf;
+ 
+               rcu_assign_pointer(trace_profile_buf, buf);
+ 
+               buf = (char *)alloc_percpu(profile_buf_t);
+               if (!buf)
+                       goto fail_buf_nmi;
+ 
+               rcu_assign_pointer(trace_profile_buf_nmi, buf);
+       }
+ 
++      ret = event->profile_enable(event);
+       if (!ret)
+               return 0;
+ 
+       kfree(trace_profile_buf_nmi);
+ fail_buf_nmi:
+       kfree(trace_profile_buf);
+ fail_buf:
+       total_profile_count--;
+       atomic_dec(&event->profile_count);
+ 
+       return ret;
+ }
+ 
   int ftrace_profile_enable(int event_id)
   {
         struct ftrace_event_call *event;
@@@ -14,8 -66,9 +66,9 @@@
   
         mutex_lock(&event_mutex);
         list_for_each_entry(event, &ftrace_events, list) {
-               if (event->id == event_id && event->profile_enable) {
-                       ret = event->profile_enable(event);
+               if (event->id == event_id && event->profile_enable &&
+                   try_module_get(event->mod)) {
+                       ret = ftrace_profile_enable_event(event);
                         break;
                 }
         }
@@@ -24,6 -77,33 +77,33 @@@
         return ret;
   }
   
- -      event->profile_disable();
+ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
+ {
+       char *buf, *nmi_buf;
+ 
+       if (!atomic_add_negative(-1, &event->profile_count))
+               return;
+ 
++      event->profile_disable(event);
+ 
+       if (!--total_profile_count) {
+               buf = trace_profile_buf;
+               rcu_assign_pointer(trace_profile_buf, NULL);
+ 
+               nmi_buf = trace_profile_buf_nmi;
+               rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+ 
+               /*
+                * Ensure every events in profiling have finished before
+                * releasing the buffers
+                */
+               synchronize_sched();
+ 
+               free_percpu(buf);
+               free_percpu(nmi_buf);
+       }
+ }
+ 
   void ftrace_profile_disable(int event_id)
   {
         struct ftrace_event_call *event;
@@@ -31,7 -111,8 +111,8 @@@
         mutex_lock(&event_mutex);
         list_for_each_entry(event, &ftrace_events, list) {
                 if (event->id == event_id) {
-                       event->profile_disable(event);
+                       ftrace_profile_disable_event(event);
+                       module_put(event->mod);
                         break;
                 }
         }
diff --combined kernel/trace/trace_events.c

index f85b0f1cb9425b627564fc666ac57864f59c3b64,6f03c8a1105e925f7604b7aaeac21e6486cb7c4f..a4b7c9a9130cfbacebf4fb1578ad3cae6c28bc9f
--- 1/kernel/trace/trace_events.c
--- 2/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@@ -21,6 -21,7 +21,7 @@@
   
   #include "trace_output.h"
   
+ #undef TRACE_SYSTEM
   #define TRACE_SYSTEM "TRACE_SYSTEM"
   
   DEFINE_MUTEX(event_mutex);
@@@ -86,13 -87,15 +87,13 @@@ int trace_define_common_fields(struct f
         __common_field(unsigned char, flags);
         __common_field(unsigned char, preempt_count);
         __common_field(int, pid);
-       __common_field(int, tgid);
+       __common_field(int, lock_depth);
   
         return ret;
   }
   EXPORT_SYMBOL_GPL(trace_define_common_fields);
   
- -#ifdef CONFIG_MODULES
- -
- -static void trace_destroy_fields(struct ftrace_event_call *call)
+ +void trace_destroy_fields(struct ftrace_event_call *call)
   {
         struct ftrace_event_field *field, *next;
   
@@@ -104,6 -107,8 +105,6 @@@
         }
   }
   
- -#endif /* CONFIG_MODULES */
- -
   static void ftrace_event_enable_disable(struct ftrace_event_call *call,
                                         int enable)
   {
@@@ -112,14 -117,14 +113,14 @@@
                 if (call->enabled) {
                         call->enabled = 0;
                         tracing_stop_cmdline_record();
- -                      call->unregfunc(call->data);
+ +                      call->unregfunc(call);
                 }
                 break;
         case 1:
                 if (!call->enabled) {
                         call->enabled = 1;
                         tracing_start_cmdline_record();
- -                      call->regfunc(call->data);
+ +                      call->regfunc(call);
                 }
                 break;
         }
@@@ -226,11 -231,9 +227,9 @@@ static ssize_
   ftrace_event_write(struct file *file, const char __user *ubuf,
                    size_t cnt, loff_t *ppos)
   {
+       struct trace_parser parser;
         size_t read = 0;
-       int i, set = 1;
         ssize_t ret;
-       char *buf;
-       char ch;
   
         if (!cnt || cnt < 0)
                 return 0;
@@@ -239,60 -242,28 +238,28 @@@
         if (ret < 0)
                 return ret;
   
-       ret = get_user(ch, ubuf++);
-       if (ret)
-               return ret;
-       read++;
-       cnt--;
- 
-       /* skip white space */
-       while (cnt && isspace(ch)) {
-               ret = get_user(ch, ubuf++);
-               if (ret)
-                       return ret;
-               read++;
-               cnt--;
-       }
- 
-       /* Only white space found? */
-       if (isspace(ch)) {
-               file->f_pos += read;
-               ret = read;
-               return ret;
-       }
- 
-       buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
-       if (!buf)
+       if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
                 return -ENOMEM;
   
-       if (cnt > EVENT_BUF_SIZE)
-               cnt = EVENT_BUF_SIZE;
+       read = trace_get_user(&parser, ubuf, cnt, ppos);
+ 
+       if (trace_parser_loaded((&parser))) {
+               int set = 1;
   
-       i = 0;
-       while (cnt && !isspace(ch)) {
-               if (!i && ch == '!')
+               if (*parser.buffer == '!')
                         set = 0;
-               else
-                       buf[i++] = ch;
   
-               ret = get_user(ch, ubuf++);
+               parser.buffer[parser.idx] = 0;
+ 
+               ret = ftrace_set_clr_event(parser.buffer + !set, set);
                 if (ret)
-                       goto out_free;
-               read++;
-               cnt--;
+                       goto out_put;
         }
-       buf[i] = 0;
- 
-       file->f_pos += read;
- 
-       ret = ftrace_set_clr_event(buf, set);
-       if (ret)
-               goto out_free;
   
         ret = read;
   
-  out_free:
-       kfree(buf);
+  out_put:
+       trace_parser_put(&parser);
   
         return ret;
   }
@@@ -300,42 -271,32 +267,32 @@@
   static void *
   t_next(struct seq_file *m, void *v, loff_t *pos)
   {
-       struct list_head *list = m->private;
-       struct ftrace_event_call *call;
+       struct ftrace_event_call *call = v;
   
         (*pos)++;
   
-       for (;;) {
-               if (list == &ftrace_events)
-                       return NULL;
- 
-               call = list_entry(list, struct ftrace_event_call, list);
- 
+       list_for_each_entry_continue(call, &ftrace_events, list) {
                 /*
                  * The ftrace subsystem is for showing formats only.
                  * They can not be enabled or disabled via the event files.
                  */
                 if (call->regfunc)
-                       break;
- 
-               list = list->next;
+                       return call;
         }
   
-       m->private = list->next;
- 
-       return call;
+       return NULL;
   }
   
   static void *t_start(struct seq_file *m, loff_t *pos)
   {
-       struct ftrace_event_call *call = NULL;
+       struct ftrace_event_call *call;
         loff_t l;
   
         mutex_lock(&event_mutex);
   
-       m->private = ftrace_events.next;
+       call = list_entry(&ftrace_events, struct ftrace_event_call, list);
         for (l = 0; l <= *pos; ) {
-               call = t_next(m, NULL, &l);
+               call = t_next(m, call, &l);
                 if (!call)
                         break;
         }
@@@ -345,37 -306,28 +302,28 @@@
   static void *
   s_next(struct seq_file *m, void *v, loff_t *pos)
   {
-       struct list_head *list = m->private;
-       struct ftrace_event_call *call;
+       struct ftrace_event_call *call = v;
   
         (*pos)++;
   
-  retry:
-       if (list == &ftrace_events)
-               return NULL;
- 
-       call = list_entry(list, struct ftrace_event_call, list);
- 
-       if (!call->enabled) {
-               list = list->next;
-               goto retry;
+       list_for_each_entry_continue(call, &ftrace_events, list) {
+               if (call->enabled)
+                       return call;
         }
   
-       m->private = list->next;
- 
-       return call;
+       return NULL;
   }
   
   static void *s_start(struct seq_file *m, loff_t *pos)
   {
-       struct ftrace_event_call *call = NULL;
+       struct ftrace_event_call *call;
         loff_t l;
   
         mutex_lock(&event_mutex);
   
-       m->private = ftrace_events.next;
+       call = list_entry(&ftrace_events, struct ftrace_event_call, list);
         for (l = 0; l <= *pos; ) {
-               call = s_next(m, NULL, &l);
+               call = s_next(m, call, &l);
                 if (!call)
                         break;
         }
@@@ -574,7 -526,7 +522,7 @@@ static int trace_write_header(struct tr
                                 FIELD(unsigned char, flags),
                                 FIELD(unsigned char, preempt_count),
                                 FIELD(int, pid),
-                               FIELD(int, tgid));
+                               FIELD(int, lock_depth));
   }
   
   static ssize_t
@@@ -987,46 -939,27 +935,46 @@@ event_create_dir(struct ftrace_event_ca
         return 0;
   }
   
- -#define for_each_event(event, start, end)                     \
- -      for (event = start;                                     \
- -           (unsigned long)event < (unsigned long)end;         \
- -           event++)
+ +static int __trace_add_event_call(struct ftrace_event_call *call)
+ +{
+ +      struct dentry *d_events;
+ +      int ret;
   
- -#ifdef CONFIG_MODULES
+ +      if (!call->name)
+ +              return -EINVAL;
   
- -static LIST_HEAD(ftrace_module_file_list);
+ +      if (call->raw_init) {
+ +              ret = call->raw_init(call);
+ +              if (ret < 0) {
+ +                      if (ret != -ENOSYS)
+ +                              pr_warning("Could not initialize trace "
+ +                              "events/%s\n", call->name);
+ +                      return ret;
+ +              }
+ +      }
   
- -/*
- - * Modules must own their file_operations to keep up with
- - * reference counting.
- - */
- -struct ftrace_module_file_ops {
- -      struct list_head                list;
- -      struct module                   *mod;
- -      struct file_operations          id;
- -      struct file_operations          enable;
- -      struct file_operations          format;
- -      struct file_operations          filter;
- -};
+ +      d_events = event_trace_events_dir();
+ +      if (!d_events)
+ +              return -ENOENT;
+ +
+ +      list_add(&call->list, &ftrace_events);
+ +      ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
+ +                              &ftrace_enable_fops, &ftrace_event_filter_fops,
+ +                              &ftrace_event_format_fops);
+ +      if (ret < 0)
+ +              list_del(&call->list);
+ +      return ret;
+ +}
+ +
+ +/* Add an additional event_call dynamically */
+ +int trace_add_event_call(struct ftrace_event_call *call)
+ +{
+ +      int ret;
+ +      mutex_lock(&event_mutex);
+ +      ret = __trace_add_event_call(call);
+ +      mutex_unlock(&event_mutex);
+ +      return ret;
+ +}
   
   static void remove_subsystem_dir(const char *name)
   {
@@@ -1054,53 -987,6 +1002,53 @@@
         }
   }
   
+ +/*
+ + * Must be called under locking both of event_mutex and trace_event_mutex.
+ + */
+ +static void __trace_remove_event_call(struct ftrace_event_call *call)
+ +{
+ +      ftrace_event_enable_disable(call, 0);
+ +      if (call->event)
+ +              __unregister_ftrace_event(call->event);
+ +      debugfs_remove_recursive(call->dir);
+ +      list_del(&call->list);
+ +      trace_destroy_fields(call);
+ +      destroy_preds(call);
+ +      remove_subsystem_dir(call->system);
+ +}
+ +
+ +/* Remove an event_call */
+ +void trace_remove_event_call(struct ftrace_event_call *call)
+ +{
+ +      mutex_lock(&event_mutex);
+ +      down_write(&trace_event_mutex);
+ +      __trace_remove_event_call(call);
+ +      up_write(&trace_event_mutex);
+ +      mutex_unlock(&event_mutex);
+ +}
+ +
+ +#define for_each_event(event, start, end)                     \
+ +      for (event = start;                                     \
+ +           (unsigned long)event < (unsigned long)end;         \
+ +           event++)
+ +
+ +#ifdef CONFIG_MODULES
+ +
+ +static LIST_HEAD(ftrace_module_file_list);
+ +
+ +/*
+ + * Modules must own their file_operations to keep up with
+ + * reference counting.
+ + */
+ +struct ftrace_module_file_ops {
+ +      struct list_head                list;
+ +      struct module                   *mod;
+ +      struct file_operations          id;
+ +      struct file_operations          enable;
+ +      struct file_operations          format;
+ +      struct file_operations          filter;
+ +};
+ +
   static struct ftrace_module_file_ops *
   trace_create_file_ops(struct module *mod)
   {
@@@ -1158,7 -1044,7 +1106,7 @@@ static void trace_module_add_events(str
                 if (!call->name)
                         continue;
                 if (call->raw_init) {
- -                      ret = call->raw_init();
+ +                      ret = call->raw_init(call);
                         if (ret < 0) {
                                 if (ret != -ENOSYS)
                                         pr_warning("Could not initialize trace "
@@@ -1193,7 -1079,14 +1141,7 @@@ static void trace_module_remove_events(
         list_for_each_entry_safe(call, p, &ftrace_events, list) {
                 if (call->mod == mod) {
                         found = true;
- -                      ftrace_event_enable_disable(call, 0);
- -                      if (call->event)
- -                              __unregister_ftrace_event(call->event);
- -                      debugfs_remove_recursive(call->dir);
- -                      list_del(&call->list);
- -                      trace_destroy_fields(call);
- -                      destroy_preds(call);
- -                      remove_subsystem_dir(call->system);
+ +                      __trace_remove_event_call(call);
                 }
         }
   
@@@ -1242,7 -1135,7 +1190,7 @@@ static int trace_module_notify(struct n
   }
   #endif /* CONFIG_MODULES */
   
- struct notifier_block trace_module_nb = {
+ static struct notifier_block trace_module_nb = {
         .notifier_call = trace_module_notify,
         .priority = 0,
   };
@@@ -1311,7 -1204,7 +1259,7 @@@ static __init int event_trace_init(void
                 if (!call->name)
                         continue;
                 if (call->raw_init) {
- -                      ret = call->raw_init();
+ +                      ret = call->raw_init(call);
                         if (ret < 0) {
                                 if (ret != -ENOSYS)
                                         pr_warning("Could not initialize trace "
@@@ -1414,6 -1307,18 +1362,18 @@@ static __init void event_trace_self_tes
                 if (!call->regfunc)
                         continue;
   
+ /*
+  * Testing syscall events here is pretty useless, but
+  * we still do it if configured. But this is time consuming.
+  * What we really need is a user thread to perform the
+  * syscalls as we test.
+  */
+ #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
+               if (call->system &&
+                   strcmp(call->system, "syscalls") == 0)
+                       continue;
+ #endif
+ 
                 pr_info("Testing event %s: ", call->name);
   
                 /*
@@@ -1487,7 -1392,7 +1447,7 @@@
   
   #ifdef CONFIG_FUNCTION_TRACER
   
- static DEFINE_PER_CPU(atomic_t, test_event_disable);
+ static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
   
   static void
   function_test_events_call(unsigned long ip, unsigned long parent_ip)
@@@ -1504,7 -1409,7 +1464,7 @@@
         pc = preempt_count();
         resched = ftrace_preempt_disable();
         cpu = raw_smp_processor_id();
-       disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+       disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
   
         if (disabled != 1)
                 goto out;
@@@ -1523,7 -1428,7 +1483,7 @@@
         trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
   
    out:
-       atomic_dec(&per_cpu(test_event_disable, cpu));
+       atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
         ftrace_preempt_enable(resched);
   }
   
diff --combined kernel/trace/trace_export.c

index a79ef6f193c0460908ccc7526c18e9f500206cd2,9753fcc61bc55b4577527f1ef8ef4853cba7406f..ed7d480835201c1c7a9003f2c1535e7d3e827f1f
--- 1/kernel/trace/trace_export.c
--- 2/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@@ -15,147 -15,125 +15,124 @@@
   
   #include "trace_output.h"
   
+ #undef TRACE_SYSTEM
+ #define TRACE_SYSTEM  ftrace
   
- #undef TRACE_STRUCT
- #define TRACE_STRUCT(args...) args
+ /* not needed for this file */
+ #undef __field_struct
+ #define __field_struct(type, item)
   
- extern void __bad_type_size(void);
+ #undef __field
+ #define __field(type, item)                           type item;
   
- #undef TRACE_FIELD
- #define TRACE_FIELD(type, item, assign)                                       \
-       if (sizeof(type) != sizeof(field.item))                         \
-               __bad_type_size();                                      \
+ #undef __field_desc
+ #define __field_desc(type, container, item)           type item;
+ 
+ #undef __array
+ #define __array(type, item, size)                     type item[size];
+ 
+ #undef __array_desc
+ #define __array_desc(type, container, item, size)     type item[size];
+ 
+ #undef __dynamic_array
+ #define __dynamic_array(type, item)                   type item[];
+ 
+ #undef F_STRUCT
+ #define F_STRUCT(args...)                             args
+ 
+ #undef F_printk
+ #define F_printk(fmt, args...) fmt, args
+ 
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print)   \
+ struct ____ftrace_##name {                                    \
+       tstruct                                                 \
+ };                                                            \
+ static void __used ____ftrace_check_##name(void)              \
+ {                                                             \
+       struct ____ftrace_##name *__entry = NULL;               \
+                                                               \
+       /* force cmpile-time check on F_printk() */             \
+       printk(print);                                          \
+ }
+ 
+ #undef FTRACE_ENTRY_DUP
+ #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print)       \
+       FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+ 
+ #include "trace_entries.h"
+ 
+ 
+ #undef __field
+ #define __field(type, item)                                           \
         ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
-                              "offset:%u;\tsize:%u;\n",                \
-                              (unsigned int)offsetof(typeof(field), item), \
-                              (unsigned int)sizeof(field.item));       \
+                              "offset:%zu;\tsize:%zu;\n",              \
+                              offsetof(typeof(field), item),           \
+                              sizeof(field.item));                     \
         if (!ret)                                                       \
                 return 0;
   
+ #undef __field_desc
+ #define __field_desc(type, container, item)                           \
+       ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
+                              "offset:%zu;\tsize:%zu;\n",              \
+                              offsetof(typeof(field), container.item), \
+                              sizeof(field.container.item));           \
+       if (!ret)                                                       \
+               return 0;
   
- #undef TRACE_FIELD_SPECIAL
- #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)                        \
-       ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t"   \
-                              "offset:%u;\tsize:%u;\n",                \
-                              (unsigned int)offsetof(typeof(field), item), \
-                              (unsigned int)sizeof(field.item));       \
+ #undef __array
+ #define __array(type, item, len)                                      \
+       ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
+                              "offset:%zu;\tsize:%zu;\n",              \
+                              offsetof(typeof(field), item),   \
+                              sizeof(field.item));             \
         if (!ret)                                                       \
                 return 0;
   
- #undef TRACE_FIELD_ZERO
- #define TRACE_FIELD_ZERO(type, item)                                  \
-       ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
-                              "offset:%u;\tsize:0;\n",                 \
-                              (unsigned int)offsetof(typeof(field), item)); \
+ #undef __array_desc
+ #define __array_desc(type, container, item, len)                      \
+       ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
+                              "offset:%zu;\tsize:%zu;\n",              \
+                              offsetof(typeof(field), container.item), \
+                              sizeof(field.container.item));           \
         if (!ret)                                                       \
                 return 0;
   
- #undef TRACE_FIELD_SIGN
- #define TRACE_FIELD_SIGN(type, item, assign, is_signed)       \
-       TRACE_FIELD(type, item, assign)
+ #undef __dynamic_array
+ #define __dynamic_array(type, item)                                   \
+       ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
+                              "offset:%zu;\tsize:0;\n",                \
+                              offsetof(typeof(field), item));          \
+       if (!ret)                                                       \
+               return 0;
   
- #undef TP_RAW_FMT
- #define TP_RAW_FMT(args...) args
+ #undef F_printk
+ #define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
   
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)    \
- static int                                                            \
- ftrace_format_##call(struct ftrace_event_call *unused,                        \
-                     struct trace_seq *s)                              \
- {                                                                     \
-       struct args field;                                              \
-       int ret;                                                        \
-                                                                       \
-       tstruct;                                                        \
-                                                                       \
-       trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);            \
-                                                                       \
-       return ret;                                                     \
- }
+ #undef __entry
+ #define __entry REC
   
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,  \
-                                   tpfmt)                              \
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print)           \
   static int                                                            \
- ftrace_format_##call(struct ftrace_event_call *unused,                        \
-                     struct trace_seq *s)                              \
+ ftrace_format_##name(struct ftrace_event_call *unused,                        \
+                    struct trace_seq *s)                               \
   {                                                                     \
-       struct args field;                                              \
-       int ret;                                                        \
+       struct struct_name field __attribute__((unused));               \
+       int ret = 0;                                                    \
                                                                         \
         tstruct;                                                        \
                                                                         \
-       trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);            \
+       trace_seq_printf(s, "\nprint fmt: " print);                     \
                                                                         \
         return ret;                                                     \
   }
   
- #include "trace_event_types.h"
- 
- #undef TRACE_FIELD
- #define TRACE_FIELD(type, item, assign)\
-       entry->item = assign;
- 
- #undef TRACE_FIELD
- #define TRACE_FIELD(type, item, assign)\
-       entry->item = assign;
- 
- #undef TRACE_FIELD_SIGN
- #define TRACE_FIELD_SIGN(type, item, assign, is_signed)       \
-       TRACE_FIELD(type, item, assign)
- 
- #undef TRACE_FIELD_ZERO
- #define TRACE_FIELD_ZERO(type, item)
- 
- #undef TP_CMD
- #define TP_CMD(cmd...)        cmd
- 
- #undef TRACE_ENTRY
- #define TRACE_ENTRY   entry
- 
- #undef TRACE_FIELD_SPECIAL
- #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)        \
-       cmd;
- 
- static int ftrace_raw_init_event(struct ftrace_event_call *event_call)
- {
-       INIT_LIST_HEAD(&event_call->fields);
- 
-       return 0;
- }
- 
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)    \
- int ftrace_define_fields_##call(struct ftrace_event_call *event_call);        \
-                                                                       \
- struct ftrace_event_call __used                                               \
- __attribute__((__aligned__(4)))                                               \
- __attribute__((section("_ftrace_events"))) event_##call = {           \
-       .name                   = #call,                                \
-       .id                     = proto,                                \
-       .system                 = __stringify(TRACE_SYSTEM),            \
-       .raw_init               = ftrace_raw_init_event,                \
-       .show_format            = ftrace_format_##call,                 \
-       .define_fields          = ftrace_define_fields_##call,          \
- };                                                                    \
- 
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,  \
-                                   tpfmt)                              \
-                                                                       \
- struct ftrace_event_call __used                                               \
- __attribute__((__aligned__(4)))                                               \
- __attribute__((section("_ftrace_events"))) event_##call = {           \
-       .name                   = #call,                                \
-       .id                     = proto,                                \
-       .system                 = __stringify(TRACE_SYSTEM),            \
-       .show_format            = ftrace_format_##call,                 \
- };
- 
- #include "trace_event_types.h"
+ #include "trace_entries.h"
   
- #undef TRACE_FIELD
- #define TRACE_FIELD(type, item, assign)                                       \
- -
+ #undef __field
+ #define __field(type, item)                                           \
         ret = trace_define_field(event_call, #type, #item,              \
                                  offsetof(typeof(field), item),         \
                                  sizeof(field.item),                    \
@@@ -163,32 -141,45 +140,45 @@@
         if (ret)                                                        \
                 return ret;
   
- #undef TRACE_FIELD_SPECIAL
- #define TRACE_FIELD_SPECIAL(type, item, len, cmd)                     \
+ #undef __field_desc
+ #define __field_desc(type, container, item)   \
+       ret = trace_define_field(event_call, #type, #item,              \
+                                offsetof(typeof(field),                \
+                                         container.item),              \
+                                sizeof(field.container.item),          \
+                                is_signed_type(type), FILTER_OTHER);   \
+       if (ret)                                                        \
+               return ret;
+ 
+ #undef __array
+ #define __array(type, item, len)                                      \
+       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
         ret = trace_define_field(event_call, #type "[" #len "]", #item, \
                                  offsetof(typeof(field), item),         \
                                  sizeof(field.item), 0, FILTER_OTHER);  \
         if (ret)                                                        \
                 return ret;
   
- #undef TRACE_FIELD_SIGN
- #define TRACE_FIELD_SIGN(type, item, assign, is_signed)                       \
-       ret = trace_define_field(event_call, #type, #item,              \
-                                offsetof(typeof(field), item),         \
-                                sizeof(field.item), is_signed,         \
+ #undef __array_desc
+ #define __array_desc(type, container, item, len)                      \
+       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
+       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+                                offsetof(typeof(field),                \
+                                         container.item),              \
+                                sizeof(field.container.item), 0,       \
                                  FILTER_OTHER);                         \
         if (ret)                                                        \
                 return ret;
   
- #undef TRACE_FIELD_ZERO
- #define TRACE_FIELD_ZERO(type, item)
+ #undef __dynamic_array
+ #define __dynamic_array(type, item)
   
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)    \
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print)           \
   int                                                                   \
- ftrace_define_fields_##call(struct ftrace_event_call *event_call)     \
+ ftrace_define_fields_##name(struct ftrace_event_call *event_call)     \
   {                                                                     \
-       struct args field;                                              \
+       struct struct_name field;                                       \
         int ret;                                                        \
                                                                         \
         ret = trace_define_common_fields(event_call);                   \
@@@ -200,8 -191,42 +190,41 @@@
         return ret;                                                     \
   }
   
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,  \
-                                   tpfmt)
+ #include "trace_entries.h"
+ 
++static int ftrace_raw_init_event(struct ftrace_event_call *call)
++{
++      INIT_LIST_HEAD(&call->fields);
++      return 0;
++}
+ 
+ #undef __field
+ #define __field(type, item)
+ 
+ #undef __field_desc
+ #define __field_desc(type, container, item)
+ 
+ #undef __array
+ #define __array(type, item, len)
+ 
+ #undef __array_desc
+ #define __array_desc(type, container, item, len)
+ 
+ #undef __dynamic_array
+ #define __dynamic_array(type, item)
+ 
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(call, struct_name, type, tstruct, print)         \
- -static int ftrace_raw_init_event_##call(void);                                \
+                                                                       \
+ struct ftrace_event_call __used                                               \
+ __attribute__((__aligned__(4)))                                               \
+ __attribute__((section("_ftrace_events"))) event_##call = {           \
+       .name                   = #call,                                \
+       .id                     = type,                                 \
+       .system                 = __stringify(TRACE_SYSTEM),            \
- -      .raw_init               = ftrace_raw_init_event_##call,         \
++      .raw_init               = ftrace_raw_init_event,                \
+       .show_format            = ftrace_format_##call,                 \
+       .define_fields          = ftrace_define_fields_##call,          \
+ };                                                                    \
- -static int ftrace_raw_init_event_##call(void)                         \
- -{                                                                     \
- -      INIT_LIST_HEAD(&event_##call.fields);                           \
- -      return 0;                                                       \
- -}                                                                     \
   
- #include "trace_event_types.h"
+ #include "trace_entries.h"
diff --combined kernel/trace/trace_kprobe.c

index f6821f16227e0f1194abe4c174e1e0923beae87f,0000000000000000000000000000000000000000..09cba270392df3a2fed08da801541af7d9686bfe

mode 100644,000000..100644
--- 1/kernel/trace/trace_kprobe.c
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@@ -1,1392 -1,0 +1,1389 @@@
- #include <linux/perf_counter.h>
+ +/*
+ + * kprobe based kernel tracer
+ + *
+ + * Created by Masami Hiramatsu <mhiramat@redhat.com>
+ + *
+ + * This program is free software; you can redistribute it and/or modify
+ + * it under the terms of the GNU General Public License version 2 as
+ + * published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + * GNU General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write to the Free Software
+ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ + */
+ +
+ +#include <linux/module.h>
+ +#include <linux/uaccess.h>
+ +#include <linux/kprobes.h>
+ +#include <linux/seq_file.h>
+ +#include <linux/slab.h>
+ +#include <linux/smp.h>
+ +#include <linux/debugfs.h>
+ +#include <linux/types.h>
+ +#include <linux/string.h>
+ +#include <linux/ctype.h>
+ +#include <linux/ptrace.h>
-               perf_tpcounter_event(call->id, entry->ip, 1, entry, size);
++#include <linux/perf_event.h>
+ +
+ +#include "trace.h"
+ +#include "trace_output.h"
+ +
+ +#define MAX_TRACE_ARGS 128
+ +#define MAX_ARGSTR_LEN 63
+ +#define MAX_EVENT_NAME_LEN 64
+ +#define KPROBE_EVENT_SYSTEM "kprobes"
+ +
+ +/* currently, trace_kprobe only supports X86. */
+ +
+ +struct fetch_func {
+ +      unsigned long (*func)(struct pt_regs *, void *);
+ +      void *data;
+ +};
+ +
+ +static __kprobes unsigned long call_fetch(struct fetch_func *f,
+ +                                        struct pt_regs *regs)
+ +{
+ +      return f->func(regs, f->data);
+ +}
+ +
+ +/* fetch handlers */
+ +static __kprobes unsigned long fetch_register(struct pt_regs *regs,
+ +                                            void *offset)
+ +{
+ +      return regs_get_register(regs, (unsigned int)((unsigned long)offset));
+ +}
+ +
+ +static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
+ +                                         void *num)
+ +{
+ +      return regs_get_kernel_stack_nth(regs,
+ +                                       (unsigned int)((unsigned long)num));
+ +}
+ +
+ +static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
+ +{
+ +      unsigned long retval;
+ +
+ +      if (probe_kernel_address(addr, retval))
+ +              return 0;
+ +      return retval;
+ +}
+ +
+ +static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
+ +{
+ +      return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
+ +}
+ +
+ +static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
+ +                                            void *dummy)
+ +{
+ +      return regs_return_value(regs);
+ +}
+ +
+ +static __kprobes unsigned long fetch_ip(struct pt_regs *regs, void *dummy)
+ +{
+ +      return instruction_pointer(regs);
+ +}
+ +
+ +static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
+ +                                                 void *dummy)
+ +{
+ +      return kernel_stack_pointer(regs);
+ +}
+ +
+ +/* Memory fetching by symbol */
+ +struct symbol_cache {
+ +      char *symbol;
+ +      long offset;
+ +      unsigned long addr;
+ +};
+ +
+ +static unsigned long update_symbol_cache(struct symbol_cache *sc)
+ +{
+ +      sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
+ +      if (sc->addr)
+ +              sc->addr += sc->offset;
+ +      return sc->addr;
+ +}
+ +
+ +static void free_symbol_cache(struct symbol_cache *sc)
+ +{
+ +      kfree(sc->symbol);
+ +      kfree(sc);
+ +}
+ +
+ +static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
+ +{
+ +      struct symbol_cache *sc;
+ +
+ +      if (!sym || strlen(sym) == 0)
+ +              return NULL;
+ +      sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
+ +      if (!sc)
+ +              return NULL;
+ +
+ +      sc->symbol = kstrdup(sym, GFP_KERNEL);
+ +      if (!sc->symbol) {
+ +              kfree(sc);
+ +              return NULL;
+ +      }
+ +      sc->offset = offset;
+ +
+ +      update_symbol_cache(sc);
+ +      return sc;
+ +}
+ +
+ +static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
+ +{
+ +      struct symbol_cache *sc = data;
+ +
+ +      if (sc->addr)
+ +              return fetch_memory(regs, (void *)sc->addr);
+ +      else
+ +              return 0;
+ +}
+ +
+ +/* Special indirect memory access interface */
+ +struct indirect_fetch_data {
+ +      struct fetch_func orig;
+ +      long offset;
+ +};
+ +
+ +static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
+ +{
+ +      struct indirect_fetch_data *ind = data;
+ +      unsigned long addr;
+ +
+ +      addr = call_fetch(&ind->orig, regs);
+ +      if (addr) {
+ +              addr += ind->offset;
+ +              return fetch_memory(regs, (void *)addr);
+ +      } else
+ +              return 0;
+ +}
+ +
+ +static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
+ +{
+ +      if (data->orig.func == fetch_indirect)
+ +              free_indirect_fetch_data(data->orig.data);
+ +      else if (data->orig.func == fetch_symbol)
+ +              free_symbol_cache(data->orig.data);
+ +      kfree(data);
+ +}
+ +
+ +/**
+ + * Kprobe tracer core functions
+ + */
+ +
+ +struct probe_arg {
+ +      struct fetch_func       fetch;
+ +      const char              *name;
+ +};
+ +
+ +/* Flags for trace_probe */
+ +#define TP_FLAG_TRACE 1
+ +#define TP_FLAG_PROFILE       2
+ +
+ +struct trace_probe {
+ +      struct list_head        list;
+ +      struct kretprobe        rp;     /* Use rp.kp for kprobe use */
+ +      unsigned long           nhit;
+ +      unsigned int            flags;  /* For TP_FLAG_* */
+ +      const char              *symbol;        /* symbol name */
+ +      struct ftrace_event_call        call;
+ +      struct trace_event              event;
+ +      unsigned int            nr_args;
+ +      struct probe_arg        args[];
+ +};
+ +
+ +#define SIZEOF_TRACE_PROBE(n)                 \
+ +      (offsetof(struct trace_probe, args) +   \
+ +      (sizeof(struct probe_arg) * (n)))
+ +
+ +static __kprobes int probe_is_return(struct trace_probe *tp)
+ +{
+ +      return tp->rp.handler != NULL;
+ +}
+ +
+ +static __kprobes const char *probe_symbol(struct trace_probe *tp)
+ +{
+ +      return tp->symbol ? tp->symbol : "unknown";
+ +}
+ +
+ +static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
+ +{
+ +      int ret = -EINVAL;
+ +
+ +      if (ff->func == fetch_argument)
+ +              ret = snprintf(buf, n, "a%lu", (unsigned long)ff->data);
+ +      else if (ff->func == fetch_register) {
+ +              const char *name;
+ +              name = regs_query_register_name((unsigned int)((long)ff->data));
+ +              ret = snprintf(buf, n, "%%%s", name);
+ +      } else if (ff->func == fetch_stack)
+ +              ret = snprintf(buf, n, "s%lu", (unsigned long)ff->data);
+ +      else if (ff->func == fetch_memory)
+ +              ret = snprintf(buf, n, "@0x%p", ff->data);
+ +      else if (ff->func == fetch_symbol) {
+ +              struct symbol_cache *sc = ff->data;
+ +              ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
+ +      } else if (ff->func == fetch_retvalue)
+ +              ret = snprintf(buf, n, "rv");
+ +      else if (ff->func == fetch_ip)
+ +              ret = snprintf(buf, n, "ra");
+ +      else if (ff->func == fetch_stack_address)
+ +              ret = snprintf(buf, n, "sa");
+ +      else if (ff->func == fetch_indirect) {
+ +              struct indirect_fetch_data *id = ff->data;
+ +              size_t l = 0;
+ +              ret = snprintf(buf, n, "%+ld(", id->offset);
+ +              if (ret >= n)
+ +                      goto end;
+ +              l += ret;
+ +              ret = probe_arg_string(buf + l, n - l, &id->orig);
+ +              if (ret < 0)
+ +                      goto end;
+ +              l += ret;
+ +              ret = snprintf(buf + l, n - l, ")");
+ +              ret += l;
+ +      }
+ +end:
+ +      if (ret >= n)
+ +              return -ENOSPC;
+ +      return ret;
+ +}
+ +
+ +static int register_probe_event(struct trace_probe *tp);
+ +static void unregister_probe_event(struct trace_probe *tp);
+ +
+ +static DEFINE_MUTEX(probe_lock);
+ +static LIST_HEAD(probe_list);
+ +
+ +static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
+ +static int kretprobe_dispatcher(struct kretprobe_instance *ri,
+ +                              struct pt_regs *regs);
+ +
+ +/*
+ + * Allocate new trace_probe and initialize it (including kprobes).
+ + */
+ +static struct trace_probe *alloc_trace_probe(const char *group,
+ +                                           const char *event,
+ +                                           void *addr,
+ +                                           const char *symbol,
+ +                                           unsigned long offs,
+ +                                           int nargs, int is_return)
+ +{
+ +      struct trace_probe *tp;
+ +
+ +      tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
+ +      if (!tp)
+ +              return ERR_PTR(-ENOMEM);
+ +
+ +      if (symbol) {
+ +              tp->symbol = kstrdup(symbol, GFP_KERNEL);
+ +              if (!tp->symbol)
+ +                      goto error;
+ +              tp->rp.kp.symbol_name = tp->symbol;
+ +              tp->rp.kp.offset = offs;
+ +      } else
+ +              tp->rp.kp.addr = addr;
+ +
+ +      if (is_return)
+ +              tp->rp.handler = kretprobe_dispatcher;
+ +      else
+ +              tp->rp.kp.pre_handler = kprobe_dispatcher;
+ +
+ +      if (!event)
+ +              goto error;
+ +      tp->call.name = kstrdup(event, GFP_KERNEL);
+ +      if (!tp->call.name)
+ +              goto error;
+ +
+ +      if (!group)
+ +              goto error;
+ +      tp->call.system = kstrdup(group, GFP_KERNEL);
+ +      if (!tp->call.system)
+ +              goto error;
+ +
+ +      INIT_LIST_HEAD(&tp->list);
+ +      return tp;
+ +error:
+ +      kfree(tp->call.name);
+ +      kfree(tp->symbol);
+ +      kfree(tp);
+ +      return ERR_PTR(-ENOMEM);
+ +}
+ +
+ +static void free_probe_arg(struct probe_arg *arg)
+ +{
+ +      if (arg->fetch.func == fetch_symbol)
+ +              free_symbol_cache(arg->fetch.data);
+ +      else if (arg->fetch.func == fetch_indirect)
+ +              free_indirect_fetch_data(arg->fetch.data);
+ +      kfree(arg->name);
+ +}
+ +
+ +static void free_trace_probe(struct trace_probe *tp)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              free_probe_arg(&tp->args[i]);
+ +
+ +      kfree(tp->call.system);
+ +      kfree(tp->call.name);
+ +      kfree(tp->symbol);
+ +      kfree(tp);
+ +}
+ +
+ +static struct trace_probe *find_probe_event(const char *event)
+ +{
+ +      struct trace_probe *tp;
+ +
+ +      list_for_each_entry(tp, &probe_list, list)
+ +              if (!strcmp(tp->call.name, event))
+ +                      return tp;
+ +      return NULL;
+ +}
+ +
+ +/* Unregister a trace_probe and probe_event: call with locking probe_lock */
+ +static void unregister_trace_probe(struct trace_probe *tp)
+ +{
+ +      if (probe_is_return(tp))
+ +              unregister_kretprobe(&tp->rp);
+ +      else
+ +              unregister_kprobe(&tp->rp.kp);
+ +      list_del(&tp->list);
+ +      unregister_probe_event(tp);
+ +}
+ +
+ +/* Register a trace_probe and probe_event */
+ +static int register_trace_probe(struct trace_probe *tp)
+ +{
+ +      struct trace_probe *old_tp;
+ +      int ret;
+ +
+ +      mutex_lock(&probe_lock);
+ +
+ +      /* register as an event */
+ +      old_tp = find_probe_event(tp->call.name);
+ +      if (old_tp) {
+ +              /* delete old event */
+ +              unregister_trace_probe(old_tp);
+ +              free_trace_probe(old_tp);
+ +      }
+ +      ret = register_probe_event(tp);
+ +      if (ret) {
+ +              pr_warning("Faild to register probe event(%d)\n", ret);
+ +              goto end;
+ +      }
+ +
+ +      tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
+ +      if (probe_is_return(tp))
+ +              ret = register_kretprobe(&tp->rp);
+ +      else
+ +              ret = register_kprobe(&tp->rp.kp);
+ +
+ +      if (ret) {
+ +              pr_warning("Could not insert probe(%d)\n", ret);
+ +              if (ret == -EILSEQ) {
+ +                      pr_warning("Probing address(0x%p) is not an "
+ +                                 "instruction boundary.\n",
+ +                                 tp->rp.kp.addr);
+ +                      ret = -EINVAL;
+ +              }
+ +              unregister_probe_event(tp);
+ +      } else
+ +              list_add_tail(&tp->list, &probe_list);
+ +end:
+ +      mutex_unlock(&probe_lock);
+ +      return ret;
+ +}
+ +
+ +/* Split symbol and offset. */
+ +static int split_symbol_offset(char *symbol, unsigned long *offset)
+ +{
+ +      char *tmp;
+ +      int ret;
+ +
+ +      if (!offset)
+ +              return -EINVAL;
+ +
+ +      tmp = strchr(symbol, '+');
+ +      if (tmp) {
+ +              /* skip sign because strict_strtol doesn't accept '+' */
+ +              ret = strict_strtoul(tmp + 1, 0, offset);
+ +              if (ret)
+ +                      return ret;
+ +              *tmp = '\0';
+ +      } else
+ +              *offset = 0;
+ +      return 0;
+ +}
+ +
+ +#define PARAM_MAX_ARGS 16
+ +#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
+ +
+ +static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
+ +{
+ +      int ret = 0;
+ +      unsigned long param;
+ +      long offset;
+ +      char *tmp;
+ +
+ +      switch (arg[0]) {
+ +      case 'a':       /* argument */
+ +              ret = strict_strtoul(arg + 1, 10, &param);
+ +              if (ret || param > PARAM_MAX_ARGS)
+ +                      ret = -EINVAL;
+ +              else {
+ +                      ff->func = fetch_argument;
+ +                      ff->data = (void *)param;
+ +              }
+ +              break;
+ +      case 'r':       /* retval or retaddr */
+ +              if (is_return && arg[1] == 'v') {
+ +                      ff->func = fetch_retvalue;
+ +                      ff->data = NULL;
+ +              } else if (is_return && arg[1] == 'a') {
+ +                      ff->func = fetch_ip;
+ +                      ff->data = NULL;
+ +              } else
+ +                      ret = -EINVAL;
+ +              break;
+ +      case '%':       /* named register */
+ +              ret = regs_query_register_offset(arg + 1);
+ +              if (ret >= 0) {
+ +                      ff->func = fetch_register;
+ +                      ff->data = (void *)(unsigned long)ret;
+ +                      ret = 0;
+ +              }
+ +              break;
+ +      case 's':       /* stack */
+ +              if (arg[1] == 'a') {
+ +                      ff->func = fetch_stack_address;
+ +                      ff->data = NULL;
+ +              } else {
+ +                      ret = strict_strtoul(arg + 1, 10, &param);
+ +                      if (ret || param > PARAM_MAX_STACK)
+ +                              ret = -EINVAL;
+ +                      else {
+ +                              ff->func = fetch_stack;
+ +                              ff->data = (void *)param;
+ +                      }
+ +              }
+ +              break;
+ +      case '@':       /* memory or symbol */
+ +              if (isdigit(arg[1])) {
+ +                      ret = strict_strtoul(arg + 1, 0, &param);
+ +                      if (ret)
+ +                              break;
+ +                      ff->func = fetch_memory;
+ +                      ff->data = (void *)param;
+ +              } else {
+ +                      ret = split_symbol_offset(arg + 1, &offset);
+ +                      if (ret)
+ +                              break;
+ +                      ff->data = alloc_symbol_cache(arg + 1,
+ +                                                            offset);
+ +                      if (ff->data)
+ +                              ff->func = fetch_symbol;
+ +                      else
+ +                              ret = -EINVAL;
+ +              }
+ +              break;
+ +      case '+':       /* indirect memory */
+ +      case '-':
+ +              tmp = strchr(arg, '(');
+ +              if (!tmp) {
+ +                      ret = -EINVAL;
+ +                      break;
+ +              }
+ +              *tmp = '\0';
+ +              ret = strict_strtol(arg + 1, 0, &offset);
+ +              if (ret)
+ +                      break;
+ +              if (arg[0] == '-')
+ +                      offset = -offset;
+ +              arg = tmp + 1;
+ +              tmp = strrchr(arg, ')');
+ +              if (tmp) {
+ +                      struct indirect_fetch_data *id;
+ +                      *tmp = '\0';
+ +                      id = kzalloc(sizeof(struct indirect_fetch_data),
+ +                                   GFP_KERNEL);
+ +                      if (!id)
+ +                              return -ENOMEM;
+ +                      id->offset = offset;
+ +                      ret = parse_probe_arg(arg, &id->orig, is_return);
+ +                      if (ret)
+ +                              kfree(id);
+ +                      else {
+ +                              ff->func = fetch_indirect;
+ +                              ff->data = (void *)id;
+ +                      }
+ +              } else
+ +                      ret = -EINVAL;
+ +              break;
+ +      default:
+ +              /* TODO: support custom handler */
+ +              ret = -EINVAL;
+ +      }
+ +      return ret;
+ +}
+ +
+ +static int create_trace_probe(int argc, char **argv)
+ +{
+ +      /*
+ +       * Argument syntax:
+ +       *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
+ +       *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
+ +       * Fetch args:
+ +       *  aN  : fetch Nth of function argument. (N:0-)
+ +       *  rv  : fetch return value
+ +       *  ra  : fetch return address
+ +       *  sa  : fetch stack address
+ +       *  sN  : fetch Nth of stack (N:0-)
+ +       *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
+ +       *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
+ +       *  %REG        : fetch register REG
+ +       * Indirect memory fetch:
+ +       *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
+ +       * Alias name of args:
+ +       *  NAME=FETCHARG : set NAME as alias of FETCHARG.
+ +       */
+ +      struct trace_probe *tp;
+ +      int i, ret = 0;
+ +      int is_return = 0;
+ +      char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
+ +      unsigned long offset = 0;
+ +      void *addr = NULL;
+ +      char buf[MAX_EVENT_NAME_LEN];
+ +
+ +      if (argc < 2)
+ +              return -EINVAL;
+ +
+ +      if (argv[0][0] == 'p')
+ +              is_return = 0;
+ +      else if (argv[0][0] == 'r')
+ +              is_return = 1;
+ +      else
+ +              return -EINVAL;
+ +
+ +      if (argv[0][1] == ':') {
+ +              event = &argv[0][2];
+ +              if (strchr(event, '/')) {
+ +                      group = event;
+ +                      event = strchr(group, '/') + 1;
+ +                      event[-1] = '\0';
+ +                      if (strlen(group) == 0) {
+ +                              pr_info("Group name is not specifiled\n");
+ +                              return -EINVAL;
+ +                      }
+ +              }
+ +              if (strlen(event) == 0) {
+ +                      pr_info("Event name is not specifiled\n");
+ +                      return -EINVAL;
+ +              }
+ +      }
+ +
+ +      if (isdigit(argv[1][0])) {
+ +              if (is_return)
+ +                      return -EINVAL;
+ +              /* an address specified */
+ +              ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+ +              if (ret)
+ +                      return ret;
+ +      } else {
+ +              /* a symbol specified */
+ +              symbol = argv[1];
+ +              /* TODO: support .init module functions */
+ +              ret = split_symbol_offset(symbol, &offset);
+ +              if (ret)
+ +                      return ret;
+ +              if (offset && is_return)
+ +                      return -EINVAL;
+ +      }
+ +      argc -= 2; argv += 2;
+ +
+ +      /* setup a probe */
+ +      if (!group)
+ +              group = KPROBE_EVENT_SYSTEM;
+ +      if (!event) {
+ +              /* Make a new event name */
+ +              if (symbol)
+ +                      snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
+ +                               is_return ? 'r' : 'p', symbol, offset);
+ +              else
+ +                      snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
+ +                               is_return ? 'r' : 'p', addr);
+ +              event = buf;
+ +      }
+ +      tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
+ +                             is_return);
+ +      if (IS_ERR(tp))
+ +              return PTR_ERR(tp);
+ +
+ +      /* parse arguments */
+ +      ret = 0;
+ +      for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ +              /* Parse argument name */
+ +              arg = strchr(argv[i], '=');
+ +              if (arg)
+ +                      *arg++ = '\0';
+ +              else
+ +                      arg = argv[i];
+ +              tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+ +
+ +              /* Parse fetch argument */
+ +              if (strlen(arg) > MAX_ARGSTR_LEN) {
+ +                      pr_info("Argument%d(%s) is too long.\n", i, arg);
+ +                      ret = -ENOSPC;
+ +                      goto error;
+ +              }
+ +              ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
+ +              if (ret)
+ +                      goto error;
+ +      }
+ +      tp->nr_args = i;
+ +
+ +      ret = register_trace_probe(tp);
+ +      if (ret)
+ +              goto error;
+ +      return 0;
+ +
+ +error:
+ +      free_trace_probe(tp);
+ +      return ret;
+ +}
+ +
+ +static void cleanup_all_probes(void)
+ +{
+ +      struct trace_probe *tp;
+ +
+ +      mutex_lock(&probe_lock);
+ +      /* TODO: Use batch unregistration */
+ +      while (!list_empty(&probe_list)) {
+ +              tp = list_entry(probe_list.next, struct trace_probe, list);
+ +              unregister_trace_probe(tp);
+ +              free_trace_probe(tp);
+ +      }
+ +      mutex_unlock(&probe_lock);
+ +}
+ +
+ +
+ +/* Probes listing interfaces */
+ +static void *probes_seq_start(struct seq_file *m, loff_t *pos)
+ +{
+ +      mutex_lock(&probe_lock);
+ +      return seq_list_start(&probe_list, *pos);
+ +}
+ +
+ +static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
+ +{
+ +      return seq_list_next(v, &probe_list, pos);
+ +}
+ +
+ +static void probes_seq_stop(struct seq_file *m, void *v)
+ +{
+ +      mutex_unlock(&probe_lock);
+ +}
+ +
+ +static int probes_seq_show(struct seq_file *m, void *v)
+ +{
+ +      struct trace_probe *tp = v;
+ +      int i, ret;
+ +      char buf[MAX_ARGSTR_LEN + 1];
+ +
+ +      seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
+ +      seq_printf(m, ":%s", tp->call.name);
+ +
+ +      if (tp->symbol)
+ +              seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
+ +      else
+ +              seq_printf(m, " 0x%p", tp->rp.kp.addr);
+ +
+ +      for (i = 0; i < tp->nr_args; i++) {
+ +              ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
+ +              if (ret < 0) {
+ +                      pr_warning("Argument%d decoding error(%d).\n", i, ret);
+ +                      return ret;
+ +              }
+ +              seq_printf(m, " %s=%s", tp->args[i].name, buf);
+ +      }
+ +      seq_printf(m, "\n");
+ +      return 0;
+ +}
+ +
+ +static const struct seq_operations probes_seq_op = {
+ +      .start  = probes_seq_start,
+ +      .next   = probes_seq_next,
+ +      .stop   = probes_seq_stop,
+ +      .show   = probes_seq_show
+ +};
+ +
+ +static int probes_open(struct inode *inode, struct file *file)
+ +{
+ +      if ((file->f_mode & FMODE_WRITE) &&
+ +          (file->f_flags & O_TRUNC))
+ +              cleanup_all_probes();
+ +
+ +      return seq_open(file, &probes_seq_op);
+ +}
+ +
+ +static int command_trace_probe(const char *buf)
+ +{
+ +      char **argv;
+ +      int argc = 0, ret = 0;
+ +
+ +      argv = argv_split(GFP_KERNEL, buf, &argc);
+ +      if (!argv)
+ +              return -ENOMEM;
+ +
+ +      if (argc)
+ +              ret = create_trace_probe(argc, argv);
+ +
+ +      argv_free(argv);
+ +      return ret;
+ +}
+ +
+ +#define WRITE_BUFSIZE 128
+ +
+ +static ssize_t probes_write(struct file *file, const char __user *buffer,
+ +                          size_t count, loff_t *ppos)
+ +{
+ +      char *kbuf, *tmp;
+ +      int ret;
+ +      size_t done;
+ +      size_t size;
+ +
+ +      kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
+ +      if (!kbuf)
+ +              return -ENOMEM;
+ +
+ +      ret = done = 0;
+ +      while (done < count) {
+ +              size = count - done;
+ +              if (size >= WRITE_BUFSIZE)
+ +                      size = WRITE_BUFSIZE - 1;
+ +              if (copy_from_user(kbuf, buffer + done, size)) {
+ +                      ret = -EFAULT;
+ +                      goto out;
+ +              }
+ +              kbuf[size] = '\0';
+ +              tmp = strchr(kbuf, '\n');
+ +              if (tmp) {
+ +                      *tmp = '\0';
+ +                      size = tmp - kbuf + 1;
+ +              } else if (done + size < count) {
+ +                      pr_warning("Line length is too long: "
+ +                                 "Should be less than %d.", WRITE_BUFSIZE);
+ +                      ret = -EINVAL;
+ +                      goto out;
+ +              }
+ +              done += size;
+ +              /* Remove comments */
+ +              tmp = strchr(kbuf, '#');
+ +              if (tmp)
+ +                      *tmp = '\0';
+ +
+ +              ret = command_trace_probe(kbuf);
+ +              if (ret)
+ +                      goto out;
+ +      }
+ +      ret = done;
+ +out:
+ +      kfree(kbuf);
+ +      return ret;
+ +}
+ +
+ +static const struct file_operations kprobe_events_ops = {
+ +      .owner          = THIS_MODULE,
+ +      .open           = probes_open,
+ +      .read           = seq_read,
+ +      .llseek         = seq_lseek,
+ +      .release        = seq_release,
+ +      .write          = probes_write,
+ +};
+ +
+ +/* Probes profiling interfaces */
+ +static int probes_profile_seq_show(struct seq_file *m, void *v)
+ +{
+ +      struct trace_probe *tp = v;
+ +
+ +      seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
+ +                 tp->rp.kp.nmissed);
+ +
+ +      return 0;
+ +}
+ +
+ +static const struct seq_operations profile_seq_op = {
+ +      .start  = probes_seq_start,
+ +      .next   = probes_seq_next,
+ +      .stop   = probes_seq_stop,
+ +      .show   = probes_profile_seq_show
+ +};
+ +
+ +static int profile_open(struct inode *inode, struct file *file)
+ +{
+ +      return seq_open(file, &profile_seq_op);
+ +}
+ +
+ +static const struct file_operations kprobe_profile_ops = {
+ +      .owner          = THIS_MODULE,
+ +      .open           = profile_open,
+ +      .read           = seq_read,
+ +      .llseek         = seq_lseek,
+ +      .release        = seq_release,
+ +};
+ +
+ +/* Kprobe handler */
+ +static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
+ +{
+ +      struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+ +      struct kprobe_trace_entry *entry;
+ +      struct ring_buffer_event *event;
+ +      struct ring_buffer *buffer;
+ +      int size, i, pc;
+ +      unsigned long irq_flags;
+ +      struct ftrace_event_call *call = &tp->call;
+ +
+ +      tp->nhit++;
+ +
+ +      local_save_flags(irq_flags);
+ +      pc = preempt_count();
+ +
+ +      size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+ +
+ +      event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
+ +                                                irq_flags, pc);
+ +      if (!event)
+ +              return 0;
+ +
+ +      entry = ring_buffer_event_data(event);
+ +      entry->nargs = tp->nr_args;
+ +      entry->ip = (unsigned long)kp->addr;
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
+ +
+ +      if (!filter_current_check_discard(buffer, call, entry, event))
+ +              trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+ +      return 0;
+ +}
+ +
+ +/* Kretprobe handler */
+ +static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
+ +                                        struct pt_regs *regs)
+ +{
+ +      struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+ +      struct kretprobe_trace_entry *entry;
+ +      struct ring_buffer_event *event;
+ +      struct ring_buffer *buffer;
+ +      int size, i, pc;
+ +      unsigned long irq_flags;
+ +      struct ftrace_event_call *call = &tp->call;
+ +
+ +      local_save_flags(irq_flags);
+ +      pc = preempt_count();
+ +
+ +      size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+ +
+ +      event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
+ +                                                irq_flags, pc);
+ +      if (!event)
+ +              return 0;
+ +
+ +      entry = ring_buffer_event_data(event);
+ +      entry->nargs = tp->nr_args;
+ +      entry->func = (unsigned long)tp->rp.kp.addr;
+ +      entry->ret_ip = (unsigned long)ri->ret_addr;
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
+ +
+ +      if (!filter_current_check_discard(buffer, call, entry, event))
+ +              trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+ +
+ +      return 0;
+ +}
+ +
+ +/* Event entry printers */
+ +enum print_line_t
+ +print_kprobe_event(struct trace_iterator *iter, int flags)
+ +{
+ +      struct kprobe_trace_entry *field;
+ +      struct trace_seq *s = &iter->seq;
+ +      struct trace_event *event;
+ +      struct trace_probe *tp;
+ +      int i;
+ +
+ +      field = (struct kprobe_trace_entry *)iter->ent;
+ +      event = ftrace_find_event(field->ent.type);
+ +      tp = container_of(event, struct trace_probe, event);
+ +
+ +      if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ +              goto partial;
+ +
+ +      if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+ +              goto partial;
+ +
+ +      if (!trace_seq_puts(s, ")"))
+ +              goto partial;
+ +
+ +      for (i = 0; i < field->nargs; i++)
+ +              if (!trace_seq_printf(s, " %s=%lx",
+ +                                    tp->args[i].name, field->args[i]))
+ +                      goto partial;
+ +
+ +      if (!trace_seq_puts(s, "\n"))
+ +              goto partial;
+ +
+ +      return TRACE_TYPE_HANDLED;
+ +partial:
+ +      return TRACE_TYPE_PARTIAL_LINE;
+ +}
+ +
+ +enum print_line_t
+ +print_kretprobe_event(struct trace_iterator *iter, int flags)
+ +{
+ +      struct kretprobe_trace_entry *field;
+ +      struct trace_seq *s = &iter->seq;
+ +      struct trace_event *event;
+ +      struct trace_probe *tp;
+ +      int i;
+ +
+ +      field = (struct kretprobe_trace_entry *)iter->ent;
+ +      event = ftrace_find_event(field->ent.type);
+ +      tp = container_of(event, struct trace_probe, event);
+ +
+ +      if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ +              goto partial;
+ +
+ +      if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
+ +              goto partial;
+ +
+ +      if (!trace_seq_puts(s, " <- "))
+ +              goto partial;
+ +
+ +      if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
+ +              goto partial;
+ +
+ +      if (!trace_seq_puts(s, ")"))
+ +              goto partial;
+ +
+ +      for (i = 0; i < field->nargs; i++)
+ +              if (!trace_seq_printf(s, " %s=%lx",
+ +                                    tp->args[i].name, field->args[i]))
+ +                      goto partial;
+ +
+ +      if (!trace_seq_puts(s, "\n"))
+ +              goto partial;
+ +
+ +      return TRACE_TYPE_HANDLED;
+ +partial:
+ +      return TRACE_TYPE_PARTIAL_LINE;
+ +}
+ +
+ +static int probe_event_enable(struct ftrace_event_call *call)
+ +{
+ +      struct trace_probe *tp = (struct trace_probe *)call->data;
+ +
+ +      tp->flags |= TP_FLAG_TRACE;
+ +      if (probe_is_return(tp))
+ +              return enable_kretprobe(&tp->rp);
+ +      else
+ +              return enable_kprobe(&tp->rp.kp);
+ +}
+ +
+ +static void probe_event_disable(struct ftrace_event_call *call)
+ +{
+ +      struct trace_probe *tp = (struct trace_probe *)call->data;
+ +
+ +      tp->flags &= ~TP_FLAG_TRACE;
+ +      if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
+ +              if (probe_is_return(tp))
+ +                      disable_kretprobe(&tp->rp);
+ +              else
+ +                      disable_kprobe(&tp->rp.kp);
+ +      }
+ +}
+ +
+ +static int probe_event_raw_init(struct ftrace_event_call *event_call)
+ +{
+ +      INIT_LIST_HEAD(&event_call->fields);
+ +
+ +      return 0;
+ +}
+ +
+ +#undef DEFINE_FIELD
+ +#define DEFINE_FIELD(type, item, name, is_signed)                     \
+ +      do {                                                            \
+ +              ret = trace_define_field(event_call, #type, name,       \
+ +                                       offsetof(typeof(field), item), \
+ +                                       sizeof(field.item), is_signed, \
+ +                                       FILTER_OTHER);                 \
+ +              if (ret)                                                \
+ +                      return ret;                                     \
+ +      } while (0)
+ +
+ +static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
+ +{
+ +      int ret, i;
+ +      struct kprobe_trace_entry field;
+ +      struct trace_probe *tp = (struct trace_probe *)event_call->data;
+ +
+ +      ret = trace_define_common_fields(event_call);
+ +      if (!ret)
+ +              return ret;
+ +
+ +      DEFINE_FIELD(unsigned long, ip, "ip", 0);
+ +      DEFINE_FIELD(int, nargs, "nargs", 1);
+ +      /* Set argument names as fields */
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
+ +      return 0;
+ +}
+ +
+ +static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
+ +{
+ +      int ret, i;
+ +      struct kretprobe_trace_entry field;
+ +      struct trace_probe *tp = (struct trace_probe *)event_call->data;
+ +
+ +      ret = trace_define_common_fields(event_call);
+ +      if (!ret)
+ +              return ret;
+ +
+ +      DEFINE_FIELD(unsigned long, func, "func", 0);
+ +      DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
+ +      DEFINE_FIELD(int, nargs, "nargs", 1);
+ +      /* Set argument names as fields */
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
+ +      return 0;
+ +}
+ +
+ +static int __probe_event_show_format(struct trace_seq *s,
+ +                                   struct trace_probe *tp, const char *fmt,
+ +                                   const char *arg)
+ +{
+ +      int i;
+ +
+ +      /* Show format */
+ +      if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
+ +              return 0;
+ +
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
+ +                      return 0;
+ +
+ +      if (!trace_seq_printf(s, "\", %s", arg))
+ +              return 0;
+ +
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
+ +                      return 0;
+ +
+ +      return trace_seq_puts(s, "\n");
+ +}
+ +
+ +#undef SHOW_FIELD
+ +#define SHOW_FIELD(type, item, name)                                  \
+ +      do {                                                            \
+ +              ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"    \
+ +                              "offset:%u;\tsize:%u;\n", name,         \
+ +                              (unsigned int)offsetof(typeof(field), item),\
+ +                              (unsigned int)sizeof(type));            \
+ +              if (!ret)                                               \
+ +                      return 0;                                       \
+ +      } while (0)
+ +
+ +static int kprobe_event_show_format(struct ftrace_event_call *call,
+ +                                  struct trace_seq *s)
+ +{
+ +      struct kprobe_trace_entry field __attribute__((unused));
+ +      int ret, i;
+ +      struct trace_probe *tp = (struct trace_probe *)call->data;
+ +
+ +      SHOW_FIELD(unsigned long, ip, "ip");
+ +      SHOW_FIELD(int, nargs, "nargs");
+ +
+ +      /* Show fields */
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
+ +      trace_seq_puts(s, "\n");
+ +
+ +      return __probe_event_show_format(s, tp, "(%lx)", "REC->ip");
+ +}
+ +
+ +static int kretprobe_event_show_format(struct ftrace_event_call *call,
+ +                                     struct trace_seq *s)
+ +{
+ +      struct kretprobe_trace_entry field __attribute__((unused));
+ +      int ret, i;
+ +      struct trace_probe *tp = (struct trace_probe *)call->data;
+ +
+ +      SHOW_FIELD(unsigned long, func, "func");
+ +      SHOW_FIELD(unsigned long, ret_ip, "ret_ip");
+ +      SHOW_FIELD(int, nargs, "nargs");
+ +
+ +      /* Show fields */
+ +      for (i = 0; i < tp->nr_args; i++)
+ +              SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
+ +      trace_seq_puts(s, "\n");
+ +
+ +      return __probe_event_show_format(s, tp, "(%lx <- %lx)",
+ +                                        "REC->func, REC->ret_ip");
+ +}
+ +
+ +#ifdef CONFIG_EVENT_PROFILE
+ +
+ +/* Kprobe profile handler */
+ +static __kprobes int kprobe_profile_func(struct kprobe *kp,
+ +                                       struct pt_regs *regs)
+ +{
+ +      struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+ +      struct ftrace_event_call *call = &tp->call;
+ +      struct kprobe_trace_entry *entry;
+ +      int size, __size, i, pc;
+ +      unsigned long irq_flags;
+ +
+ +      local_save_flags(irq_flags);
+ +      pc = preempt_count();
+ +
+ +      __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+ +      size = ALIGN(__size + sizeof(u32), sizeof(u64));
+ +      size -= sizeof(u32);
+ +
+ +      do {
+ +              char raw_data[size];
+ +              struct trace_entry *ent;
+ +              /*
+ +               * Zero dead bytes from alignment to avoid stack leak
+ +               * to userspace
+ +               */
+ +              *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ +              entry = (struct kprobe_trace_entry *)raw_data;
+ +              ent = &entry->ent;
+ +
+ +              tracing_generic_entry_update(ent, irq_flags, pc);
+ +              ent->type = call->id;
+ +              entry->nargs = tp->nr_args;
+ +              entry->ip = (unsigned long)kp->addr;
+ +              for (i = 0; i < tp->nr_args; i++)
+ +                      entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
-               perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size);
++              perf_tp_event(call->id, entry->ip, 1, entry, size);
+ +      } while (0);
+ +      return 0;
+ +}
+ +
+ +/* Kretprobe profile handler */
+ +static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
+ +                                          struct pt_regs *regs)
+ +{
+ +      struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+ +      struct ftrace_event_call *call = &tp->call;
+ +      struct kretprobe_trace_entry *entry;
+ +      int size, __size, i, pc;
+ +      unsigned long irq_flags;
+ +
+ +      local_save_flags(irq_flags);
+ +      pc = preempt_count();
+ +
+ +      __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+ +      size = ALIGN(__size + sizeof(u32), sizeof(u64));
+ +      size -= sizeof(u32);
+ +
+ +      do {
+ +              char raw_data[size];
+ +              struct trace_entry *ent;
+ +
+ +              *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ +              entry = (struct kretprobe_trace_entry *)raw_data;
+ +              ent = &entry->ent;
+ +
+ +              tracing_generic_entry_update(ent, irq_flags, pc);
+ +              ent->type = call->id;
+ +              entry->nargs = tp->nr_args;
+ +              entry->func = (unsigned long)tp->rp.kp.addr;
+ +              entry->ret_ip = (unsigned long)ri->ret_addr;
+ +              for (i = 0; i < tp->nr_args; i++)
+ +                      entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
-       if (atomic_inc_return(&call->profile_count))
-               return 0;
- 
++              perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
+ +      } while (0);
+ +      return 0;
+ +}
+ +
+ +static int probe_profile_enable(struct ftrace_event_call *call)
+ +{
+ +      struct trace_probe *tp = (struct trace_probe *)call->data;
+ +
-       if (atomic_add_negative(-1, &call->profile_count))
-               tp->flags &= ~TP_FLAG_PROFILE;
+ +      tp->flags |= TP_FLAG_PROFILE;
++
+ +      if (probe_is_return(tp))
+ +              return enable_kretprobe(&tp->rp);
+ +      else
+ +              return enable_kprobe(&tp->rp.kp);
+ +}
+ +
+ +static void probe_profile_disable(struct ftrace_event_call *call)
+ +{
+ +      struct trace_probe *tp = (struct trace_probe *)call->data;
+ +
-       if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
++      tp->flags &= ~TP_FLAG_PROFILE;
+ +
++      if (!(tp->flags & TP_FLAG_TRACE)) {
+ +              if (probe_is_return(tp))
+ +                      disable_kretprobe(&tp->rp);
+ +              else
+ +                      disable_kprobe(&tp->rp.kp);
+ +      }
+ +}
+ +#endif        /* CONFIG_EVENT_PROFILE */
+ +
+ +
+ +static __kprobes
+ +int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
+ +{
+ +      struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+ +
+ +      if (tp->flags & TP_FLAG_TRACE)
+ +              kprobe_trace_func(kp, regs);
+ +#ifdef CONFIG_EVENT_PROFILE
+ +      if (tp->flags & TP_FLAG_PROFILE)
+ +              kprobe_profile_func(kp, regs);
+ +#endif        /* CONFIG_EVENT_PROFILE */
+ +      return 0;       /* We don't tweek kernel, so just return 0 */
+ +}
+ +
+ +static __kprobes
+ +int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+ +{
+ +      struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+ +
+ +      if (tp->flags & TP_FLAG_TRACE)
+ +              kretprobe_trace_func(ri, regs);
+ +#ifdef CONFIG_EVENT_PROFILE
+ +      if (tp->flags & TP_FLAG_PROFILE)
+ +              kretprobe_profile_func(ri, regs);
+ +#endif        /* CONFIG_EVENT_PROFILE */
+ +      return 0;       /* We don't tweek kernel, so just return 0 */
+ +}
+ +
+ +static int register_probe_event(struct trace_probe *tp)
+ +{
+ +      struct ftrace_event_call *call = &tp->call;
+ +      int ret;
+ +
+ +      /* Initialize ftrace_event_call */
+ +      if (probe_is_return(tp)) {
+ +              tp->event.trace = print_kretprobe_event;
+ +              call->raw_init = probe_event_raw_init;
+ +              call->show_format = kretprobe_event_show_format;
+ +              call->define_fields = kretprobe_event_define_fields;
+ +      } else {
+ +              tp->event.trace = print_kprobe_event;
+ +              call->raw_init = probe_event_raw_init;
+ +              call->show_format = kprobe_event_show_format;
+ +              call->define_fields = kprobe_event_define_fields;
+ +      }
+ +      call->event = &tp->event;
+ +      call->id = register_ftrace_event(&tp->event);
+ +      if (!call->id)
+ +              return -ENODEV;
+ +      call->enabled = 0;
+ +      call->regfunc = probe_event_enable;
+ +      call->unregfunc = probe_event_disable;
+ +
+ +#ifdef CONFIG_EVENT_PROFILE
+ +      atomic_set(&call->profile_count, -1);
+ +      call->profile_enable = probe_profile_enable;
+ +      call->profile_disable = probe_profile_disable;
+ +#endif
+ +      call->data = tp;
+ +      ret = trace_add_event_call(call);
+ +      if (ret) {
+ +              pr_info("Failed to register kprobe event: %s\n", call->name);
+ +              unregister_ftrace_event(&tp->event);
+ +      }
+ +      return ret;
+ +}
+ +
+ +static void unregister_probe_event(struct trace_probe *tp)
+ +{
+ +      /* tp->event is unregistered in trace_remove_event_call() */
+ +      trace_remove_event_call(&tp->call);
+ +}
+ +
+ +/* Make a debugfs interface for controling probe points */
+ +static __init int init_kprobe_trace(void)
+ +{
+ +      struct dentry *d_tracer;
+ +      struct dentry *entry;
+ +
+ +      d_tracer = tracing_init_dentry();
+ +      if (!d_tracer)
+ +              return 0;
+ +
+ +      entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
+ +                                  NULL, &kprobe_events_ops);
+ +
+ +      /* Event list interface */
+ +      if (!entry)
+ +              pr_warning("Could not create debugfs "
+ +                         "'kprobe_events' entry\n");
+ +
+ +      /* Profile interface */
+ +      entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
+ +                                  NULL, &kprobe_profile_ops);
+ +
+ +      if (!entry)
+ +              pr_warning("Could not create debugfs "
+ +                         "'kprobe_profile' entry\n");
+ +      return 0;
+ +}
+ +fs_initcall(init_kprobe_trace);
+ +
+ +
+ +#ifdef CONFIG_FTRACE_STARTUP_TEST
+ +
+ +static int kprobe_trace_selftest_target(int a1, int a2, int a3,
+ +                                      int a4, int a5, int a6)
+ +{
+ +      return a1 + a2 + a3 + a4 + a5 + a6;
+ +}
+ +
+ +static __init int kprobe_trace_self_tests_init(void)
+ +{
+ +      int ret;
+ +      int (*target)(int, int, int, int, int, int);
+ +
+ +      target = kprobe_trace_selftest_target;
+ +
+ +      pr_info("Testing kprobe tracing: ");
+ +
+ +      ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
+ +                                "a1 a2 a3 a4 a5 a6");
+ +      if (WARN_ON_ONCE(ret))
+ +              pr_warning("error enabling function entry\n");
+ +
+ +      ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
+ +                                "ra rv");
+ +      if (WARN_ON_ONCE(ret))
+ +              pr_warning("error enabling function return\n");
+ +
+ +      ret = target(1, 2, 3, 4, 5, 6);
+ +
+ +      cleanup_all_probes();
+ +
+ +      pr_cont("OK\n");
+ +      return 0;
+ +}
+ +
+ +late_initcall(kprobe_trace_self_tests_init);
+ +
+ +#endif
diff --combined kernel/trace/trace_syscalls.c

index dfc55fed2099ba38c1d7ef0cc910082500b2927e,9fbce6c9d2e14610762039f0e484ca1509790c6d..1b050ab471200169bd8385b854bbd4a4aa853a1e
--- 1/kernel/trace/trace_syscalls.c
--- 2/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@@ -2,7 -2,7 +2,7 @@@
   #include <trace/events/syscalls.h>
   #include <linux/kernel.h>
   #include <linux/ftrace.h>
- #include <linux/perf_counter.h>
+ #include <linux/perf_event.h>
   #include <asm/syscall.h>
   
   #include "trace_output.h"
@@@ -285,13 -285,13 +285,13 @@@ void ftrace_syscall_exit(struct pt_reg
                 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
   }
   
- -int reg_event_syscall_enter(void *ptr)
+ +int reg_event_syscall_enter(struct ftrace_event_call *call)
   {
         int ret = 0;
         int num;
         char *name;
   
- -      name = (char *)ptr;
+ +      name = (char *)call->data;
         num = syscall_name_to_nr(name);
         if (num < 0 || num >= NR_syscalls)
                 return -ENOSYS;
@@@ -309,12 -309,12 +309,12 @@@
         return ret;
   }
   
- -void unreg_event_syscall_enter(void *ptr)
+ +void unreg_event_syscall_enter(struct ftrace_event_call *call)
   {
         int num;
         char *name;
   
- -      name = (char *)ptr;
+ +      name = (char *)call->data;
         num = syscall_name_to_nr(name);
         if (num < 0 || num >= NR_syscalls)
                 return;
@@@ -326,13 -326,13 +326,13 @@@
         mutex_unlock(&syscall_trace_lock);
   }
   
- -int reg_event_syscall_exit(void *ptr)
+ +int reg_event_syscall_exit(struct ftrace_event_call *call)
   {
         int ret = 0;
         int num;
         char *name;
   
- -      name = (char *)ptr;
+ +      name = call->data;
         num = syscall_name_to_nr(name);
         if (num < 0 || num >= NR_syscalls)
                 return -ENOSYS;
@@@ -350,12 -350,12 +350,12 @@@
         return ret;
   }
   
- -void unreg_event_syscall_exit(void *ptr)
+ +void unreg_event_syscall_exit(struct ftrace_event_call *call)
   {
         int num;
         char *name;
   
- -      name = (char *)ptr;
+ +      name = call->data;
         num = syscall_name_to_nr(name);
         if (num < 0 || num >= NR_syscalls)
                 return;
@@@ -384,10 -384,13 +384,13 @@@ static int sys_prof_refcount_exit
   
   static void prof_syscall_enter(struct pt_regs *regs, long id)
   {
-       struct syscall_trace_enter *rec;
         struct syscall_metadata *sys_data;
+       struct syscall_trace_enter *rec;
+       unsigned long flags;
+       char *raw_data;
         int syscall_nr;
         int size;
+       int cpu;
   
         syscall_nr = syscall_get_nr(current, regs);
         if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@@ -402,20 -405,38 +405,38 @@@
         size = ALIGN(size + sizeof(u32), sizeof(u64));
         size -= sizeof(u32);
   
-       do {
-               char raw_data[size];
+       if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+                     "profile buffer not large enough"))
+               return;
+ 
+       /* Protect the per cpu buffer, begin the rcu read side */
+       local_irq_save(flags);
   
-               /* zero the dead bytes from align to not leak stack to user */
-               *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+       cpu = smp_processor_id();
+ 
+       if (in_nmi())
+               raw_data = rcu_dereference(trace_profile_buf_nmi);
+       else
+               raw_data = rcu_dereference(trace_profile_buf);
+ 
+       if (!raw_data)
+               goto end;
   
-               rec = (struct syscall_trace_enter *) raw_data;
-               tracing_generic_entry_update(&rec->ent, 0, 0);
-               rec->ent.type = sys_data->enter_id;
-               rec->nr = syscall_nr;
-               syscall_get_arguments(current, regs, 0, sys_data->nb_args,
-                                      (unsigned long *)&rec->args);
-               perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
-       } while(0);
+       raw_data = per_cpu_ptr(raw_data, cpu);
+ 
+       /* zero the dead bytes from align to not leak stack to user */
+       *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ 
+       rec = (struct syscall_trace_enter *) raw_data;
+       tracing_generic_entry_update(&rec->ent, 0, 0);
+       rec->ent.type = sys_data->enter_id;
+       rec->nr = syscall_nr;
+       syscall_get_arguments(current, regs, 0, sys_data->nb_args,
+                              (unsigned long *)&rec->args);
+       perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
+ 
+ end:
+       local_irq_restore(flags);
   }
   
   int reg_prof_syscall_enter(char *name)
@@@ -460,8 -481,12 +481,12 @@@ void unreg_prof_syscall_enter(char *nam
   static void prof_syscall_exit(struct pt_regs *regs, long ret)
   {
         struct syscall_metadata *sys_data;
-       struct syscall_trace_exit rec;
+       struct syscall_trace_exit *rec;
+       unsigned long flags;
         int syscall_nr;
+       char *raw_data;
+       int size;
+       int cpu;
   
         syscall_nr = syscall_get_nr(current, regs);
         if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@@ -471,12 -496,46 +496,46 @@@
         if (!sys_data)
                 return;
   
-       tracing_generic_entry_update(&rec.ent, 0, 0);
-       rec.ent.type = sys_data->exit_id;
-       rec.nr = syscall_nr;
-       rec.ret = syscall_get_return_value(current, regs);
+       /* We can probably do that at build time */
+       size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
+       size -= sizeof(u32);
   
-       perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+       /*
+        * Impossible, but be paranoid with the future
+        * How to put this check outside runtime?
+        */
+       if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+               "exit event has grown above profile buffer size"))
+               return;
+ 
+       /* Protect the per cpu buffer, begin the rcu read side */
+       local_irq_save(flags);
+       cpu = smp_processor_id();
+ 
+       if (in_nmi())
+               raw_data = rcu_dereference(trace_profile_buf_nmi);
+       else
+               raw_data = rcu_dereference(trace_profile_buf);
+ 
+       if (!raw_data)
+               goto end;
+ 
+       raw_data = per_cpu_ptr(raw_data, cpu);
+ 
+       /* zero the dead bytes from align to not leak stack to user */
+       *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ 
+       rec = (struct syscall_trace_exit *)raw_data;
+ 
+       tracing_generic_entry_update(&rec->ent, 0, 0);
+       rec->ent.type = sys_data->exit_id;
+       rec->nr = syscall_nr;
+       rec->ret = syscall_get_return_value(current, regs);
+ 
+       perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
+ 
+ end:
+       local_irq_restore(flags);
   }
   
   int reg_prof_syscall_exit(char *name)
author	Frederic Weisbecker <fweisbec@gmail.com>
	Wed, 23 Sep 2009 21:08:43 +0000 (23:08 +0200)
committer	Frederic Weisbecker <fweisbec@gmail.com>
	Wed, 23 Sep 2009 21:08:43 +0000 (23:08 +0200)
		1	2
arch/x86/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/entry_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/lib/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/ftrace_event.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kprobes.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/syscalls.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/trace/ftrace.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/kprobes.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_event_profile.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_events.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_export.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_kprobe.c	patch \|	diff1 \|	\|	blob \| history
kernel/trace/trace_syscalls.c	patch \|	diff1 \|	diff2 \|	blob \| history