# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots:
- KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
- echo $(call cc-option,-fno-unit-at-a-time); fi ;)
+ KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
+ $(call cc-option,-fno-unit-at-a-time))
# CPU-specific tuning. Anything which can be shared with UML should go here.
include $(srctree)/arch/x86/Makefile_32.cpu
cflags-$(CONFIG_MCORE2) += \
$(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
+ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
+ $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
KBUILD_CFLAGS += $(cflags-y)
ifdef CONFIG_CC_STACKPROTECTOR
cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
- ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
+ ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y)
stackp-y := -fstack-protector
stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
KBUILD_CFLAGS += $(stackp-y)
KBUILD_IMAGE := $(boot)/bzImage
bzImage: vmlinux
+ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
+ $(Q)$(MAKE) $(build)=arch/x86/tools posttest
+endif
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
END(ftrace_graph_caller)
GLOBAL(return_to_handler)
- subq $80, %rsp
+ subq $24, %rsp
/* Save the return values */
movq %rax, (%rsp)
call ftrace_return_to_handler
- movq %rax, 72(%rsp)
+ movq %rax, 16(%rsp)
movq 8(%rsp), %rdx
movq (%rsp), %rax
- addq $72, %rsp
+ addq $16, %rsp
retq
#endif
bt $TIF_SYSCALL_AUDIT,%edx
jc sysret_audit
#endif
- /* edx: work flags (arg3) */
- leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
- xorl %esi,%esi # oldset -> arg2
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call do_notify_resume
- RESTORE_TOP_OF_STACK %r11
- RESTORE_REST
- movl $_TIF_WORK_MASK,%edi
- /* Use IRET because user could have changed frame. This
- works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- jmp int_with_check
+ /*
+ * We have a signal, or exit tracing or single-step.
+ * These all wind up with the iret return path anyway,
+ * so just join that path right now.
+ */
+ FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+ jmp int_check_syscall_exit_work
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+ int_check_syscall_exit_work:
SAVE_REST
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
call \func
.endm
+/*
+ * Interrupt entry/exit should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
* then jump to common_interrupt.
CFI_ENDPROC
END(common_interrupt)
+/*
+ * End of kprobes section
+ */
+ .popsection
/*
* APIC interrupts.
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
- #ifdef CONFIG_PERF_COUNTERS
+ #ifdef CONFIG_PERF_EVENTS
apicinterrupt LOCAL_PENDING_VECTOR \
perf_pending_interrupt smp_perf_pending_interrupt
#endif
REGSET_IOPERM32,
};
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+#ifdef CONFIG_X86_64
+ REG_OFFSET_NAME(r15),
+ REG_OFFSET_NAME(r14),
+ REG_OFFSET_NAME(r13),
+ REG_OFFSET_NAME(r12),
+ REG_OFFSET_NAME(r11),
+ REG_OFFSET_NAME(r10),
+ REG_OFFSET_NAME(r9),
+ REG_OFFSET_NAME(r8),
+#endif
+ REG_OFFSET_NAME(bx),
+ REG_OFFSET_NAME(cx),
+ REG_OFFSET_NAME(dx),
+ REG_OFFSET_NAME(si),
+ REG_OFFSET_NAME(di),
+ REG_OFFSET_NAME(bp),
+ REG_OFFSET_NAME(ax),
+#ifdef CONFIG_X86_32
+ REG_OFFSET_NAME(ds),
+ REG_OFFSET_NAME(es),
+ REG_OFFSET_NAME(fs),
+ REG_OFFSET_NAME(gs),
+#endif
+ REG_OFFSET_NAME(orig_ax),
+ REG_OFFSET_NAME(ip),
+ REG_OFFSET_NAME(cs),
+ REG_OFFSET_NAME(flags),
+ REG_OFFSET_NAME(sp),
+ REG_OFFSET_NAME(ss),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+static const int arg_offs_table[] = {
+#ifdef CONFIG_X86_32
+ [0] = offsetof(struct pt_regs, ax),
+ [1] = offsetof(struct pt_regs, dx),
+ [2] = offsetof(struct pt_regs, cx)
+#else /* CONFIG_X86_64 */
+ [0] = offsetof(struct pt_regs, di),
+ [1] = offsetof(struct pt_regs, si),
+ [2] = offsetof(struct pt_regs, dx),
+ [3] = offsetof(struct pt_regs, cx),
+ [4] = offsetof(struct pt_regs, r8),
+ [5] = offsetof(struct pt_regs, r9)
+#endif
+};
+
+/**
+ * regs_get_argument_nth() - get Nth argument at function call
+ * @regs: pt_regs which contains registers at function entry.
+ * @n: argument number.
+ *
+ * regs_get_argument_nth() returns @n th argument of a function call.
+ * Since usually the kernel stack will be changed right after function entry,
+ * you must use this at function entry. If the @n th entry is NOT in the
+ * kernel stack or pt_regs, this returns 0.
+ */
+unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
+{
+ if (n < ARRAY_SIZE(arg_offs_table))
+ return *(unsigned long *)((char *)regs + arg_offs_table[n]);
+ else {
+ /*
+ * The typical case: arg n is on the stack.
+ * (Note: stack[0] = return address, so skip it)
+ */
+ n -= ARRAY_SIZE(arg_offs_table);
+ return regs_get_kernel_stack_nth(regs, 1 + n);
+ }
+}
+
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
return set_flags(child, value);
#ifdef CONFIG_X86_64
- /*
- * Orig_ax is really just a flag with small positive and
- * negative values, so make sure to always sign-extend it
- * from 32 bits so that it works correctly regardless of
- * whether we come from a 32-bit environment or not.
- */
- case offsetof(struct user_regs_struct, orig_ax):
- value = (long) (s32) value;
- break;
-
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_OF(child))
return -EIO;
case offsetof(struct user32, regs.orig_eax):
/*
- * Sign-extend the value so that orig_eax = -1
- * causes (long)orig_ax < 0 tests to fire correctly.
+ * A 32-bit debugger setting orig_eax means to restore
+ * the state of the task restarting a 32-bit syscall.
+ * Make sure we interpret the -ERESTART* codes correctly
+ * in case the task is not actually still sitting at the
+ * exit from a 32-bit syscall with TS_COMPAT still set.
*/
- regs->orig_ax = (long) (s32) value;
+ regs->orig_ax = value;
+ if (syscall_get_nr(child, regs) >= 0)
+ task_thread_info(child)->status |= TS_COMPAT;
break;
case offsetof(struct user32, regs.eflags):
# Makefile for x86 specific library files.
#
+inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
+inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
+quiet_cmd_inat_tables = GEN $@
+ cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
+
+$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+ $(call cmd,inat_tables)
+
+$(obj)/inat.o: $(obj)/inat-tables.c
+
+clean-files := inat-tables.c
+
obj-$(CONFIG_SMP) := msr.o
lib-y := delay.o
lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
+lib-y += insn.o inat.o
+ obj-y += msr-reg.o msr-reg-export.o
+
ifeq ($(CONFIG_X86_32),y)
obj-y += atomic64_32.o
lib-y += checksum_32.o
#include <linux/bootmem.h> /* max_low_pfn */
#include <linux/kprobes.h> /* __kprobes, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
- #include <linux/perf_counter.h> /* perf_swcounter_event */
+ #include <linux/perf_event.h> /* perf_sw_event */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
* Returns 0 if mmiotrace is disabled, or if the fault is not
* handled by mmiotrace:
*/
-static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+static inline int __kprobes
+kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return 0;
}
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline int __kprobes notify_page_fault(struct pt_regs *regs)
{
int ret = 0;
*
* Handle a fault on the vmalloc or module mapping area
*/
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
{
unsigned long pgd_paddr;
pmd_t *pmd_k;
tsk->thread.screen_bitmap |= 1 << bit;
}
- static void dump_pagetable(unsigned long address)
+ static bool low_pfn(unsigned long pfn)
{
- __typeof__(pte_val(__pte(0))) page;
+ return pfn < max_low_pfn;
+ }
- page = read_cr3();
- page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+ static void dump_pagetable(unsigned long address)
+ {
+ pgd_t *base = __va(read_cr3());
+ pgd_t *pgd = &base[pgd_index(address)];
+ pmd_t *pmd;
+ pte_t *pte;
#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", page);
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && page & _PAGE_PRESENT) {
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
- & (PTRS_PER_PMD - 1)];
- printk(KERN_CONT "*pde = %016Lx ", page);
- page &= ~_PAGE_NX;
- }
- #else
- printk("*pde = %08lx ", page);
+ printk("*pdpt = %016Lx ", pgd_val(*pgd));
+ if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+ goto out;
#endif
+ pmd = pmd_offset(pud_offset(pgd, address), address);
+ printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
/*
* We must not directly access the pte in the highpte
* And let's rather not kmap-atomic the pte, just in case
* it's allocated already:
*/
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && (page & _PAGE_PRESENT)
- && !(page & _PAGE_PSE)) {
-
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
- printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
- }
+ if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+ goto out;
+ pte = pte_offset_kernel(pmd, address);
+ printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+ out:
printk("\n");
}
*
* This assumes no large pages in there.
*/
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
{
pgd_t *pgd, *pgd_ref;
pud_t *pud, *pud_ref;
static void dump_pagetable(unsigned long address)
{
- pgd_t *pgd;
+ pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd_t *pgd = base + pgd_index(address);
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- pgd = (pgd_t *)read_cr3();
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
- pgd += pgd_index(address);
if (bad_address(pgd))
goto bad;
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
*/
-static noinline int
+static noinline __kprobes int
spurious_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
/*
* If we're in an interrupt, have no user context or are running
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
} else {
tsk->min_flt++;
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
#ifndef _LINUX_FTRACE_EVENT_H
#define _LINUX_FTRACE_EVENT_H
- #include <linux/trace_seq.h>
#include <linux/ring_buffer.h>
+ #include <linux/trace_seq.h>
#include <linux/percpu.h>
+ #include <linux/hardirq.h>
struct trace_array;
struct tracer;
unsigned char flags;
unsigned char preempt_count;
int pid;
- int tgid;
+ int lock_depth;
};
#define FTRACE_MAX_EVENT \
struct dentry *dir;
struct trace_event *event;
int enabled;
- int (*regfunc)(void *);
- void (*unregfunc)(void *);
+ int (*regfunc)(struct ftrace_event_call *);
+ void (*unregfunc)(struct ftrace_event_call *);
int id;
- int (*raw_init)(void);
- int (*show_format)(struct ftrace_event_call *call,
- struct trace_seq *s);
+ int (*raw_init)(struct ftrace_event_call *);
+ int (*show_format)(struct ftrace_event_call *,
+ struct trace_seq *);
int (*define_fields)(struct ftrace_event_call *);
struct list_head fields;
int filter_active;
void *data;
atomic_t profile_count;
- int (*profile_enable)(void);
- void (*profile_disable)(void);
+ int (*profile_enable)(struct ftrace_event_call *);
+ void (*profile_disable)(struct ftrace_event_call *);
};
+ #define FTRACE_MAX_PROFILE_SIZE 2048
+
+ extern char *trace_profile_buf;
+ extern char *trace_profile_buf_nmi;
+
#define MAX_FILTER_PRED 32
- #define MAX_FILTER_STR_VAL 128
+ #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
extern void destroy_preds(struct ftrace_event_call *call);
extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
FILTER_PTR_STRING,
};
-extern int trace_define_field(struct ftrace_event_call *call,
- const char *type, const char *name,
- int offset, int size, int is_signed,
- int filter_type);
extern int trace_define_common_fields(struct ftrace_event_call *call);
+extern int trace_define_field(struct ftrace_event_call *call, const char *type,
+ const char *name, int offset, int size,
+ int is_signed, int filter_type);
+extern int trace_add_event_call(struct ftrace_event_call *call);
+extern void trace_remove_event_call(struct ftrace_event_call *call);
#define is_signed_type(type) (((type)(-1)) < 0)
#define KPROBE_HIT_SSDONE 0x00000008
/* Attach to insert probes on any functions which should be ignored*/
- #define __kprobes __attribute__((__section__(".kprobes.text"))) notrace
+ #define __kprobes __attribute__((__section__(".kprobes.text")))
#else /* CONFIG_KPROBES */
typedef int kprobe_opcode_t;
struct arch_specific_insn {
int dummy;
};
- #define __kprobes notrace
+ #define __kprobes
#endif /* CONFIG_KPROBES */
struct kprobe;
int disable_kprobe(struct kprobe *kp);
int enable_kprobe(struct kprobe *kp);
+void dump_kprobe(struct kprobe *kp);
+
#else /* !CONFIG_KPROBES: */
static inline int kprobes_built_in(void)
struct robust_list_head;
struct getcpu_cache;
struct old_linux_dirent;
- struct perf_counter_attr;
+ struct perf_event_attr;
#include <linux/types.h>
#include <linux/aio_abi.h>
#ifdef CONFIG_EVENT_PROFILE
#define TRACE_SYS_ENTER_PROFILE(sname) \
- static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \
-static int prof_sysenter_enable_##sname(void) \
++static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \
{ \
- int ret = 0; \
- if (!atomic_inc_return(&event_enter_##sname.profile_count)) \
- ret = reg_prof_syscall_enter("sys"#sname); \
- return ret; \
+ return reg_prof_syscall_enter("sys"#sname); \
} \
\
- static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\
-static void prof_sysenter_disable_##sname(void) \
++static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \
{ \
- if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \
- unreg_prof_syscall_enter("sys"#sname); \
+ unreg_prof_syscall_enter("sys"#sname); \
}
#define TRACE_SYS_EXIT_PROFILE(sname) \
- static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \
-static int prof_sysexit_enable_##sname(void) \
++static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \
{ \
- int ret = 0; \
- if (!atomic_inc_return(&event_exit_##sname.profile_count)) \
- ret = reg_prof_syscall_exit("sys"#sname); \
- return ret; \
+ return reg_prof_syscall_exit("sys"#sname); \
} \
\
- static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
-static void prof_sysexit_disable_##sname(void) \
++static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \
{ \
- if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \
- unreg_prof_syscall_exit("sys"#sname); \
+ unreg_prof_syscall_exit("sys"#sname); \
}
#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \
struct trace_event enter_syscall_print_##sname = { \
.trace = print_syscall_enter, \
}; \
- static int init_enter_##sname(void) \
+ static int init_enter_##sname(struct ftrace_event_call *call) \
{ \
int num, id; \
num = syscall_name_to_nr("sys"#sname); \
struct trace_event exit_syscall_print_##sname = { \
.trace = print_syscall_exit, \
}; \
- static int init_exit_##sname(void) \
+ static int init_exit_##sname(struct ftrace_event_call *call) \
{ \
int num, id; \
num = syscall_name_to_nr("sys"#sname); \
void __user *data);
asmlinkage long sys_umount(char __user *name, int flags);
asmlinkage long sys_oldumount(char __user *name);
- asmlinkage long sys_truncate(const char __user *path,
- unsigned long length);
+ asmlinkage long sys_truncate(const char __user *path, long length);
asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
asmlinkage long sys_stat(char __user *filename,
struct __old_kernel_stat __user *statbuf);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
- asmlinkage long sys_perf_counter_open(
- struct perf_counter_attr __user *attr_uptr,
+ asmlinkage long sys_perf_event_open(
+ struct perf_event_attr __user *attr_uptr,
pid_t pid, int cpu, int group_fd, unsigned long flags);
#endif
#undef __print_flags
#define __print_flags(flag, delim, flag_array...) \
({ \
- static const struct trace_print_flags flags[] = \
+ static const struct trace_print_flags __flags[] = \
{ flag_array, { -1, NULL }}; \
- ftrace_print_flags_seq(p, delim, flag, flags); \
+ ftrace_print_flags_seq(p, delim, flag, __flags); \
})
#undef __print_symbolic
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
- enum print_line_t \
+ static enum print_line_t \
ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
{ \
struct trace_seq *s = &iter->seq; \
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
- int \
+ static int \
ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
{ \
struct ftrace_raw_##call field; \
#ifdef CONFIG_EVENT_PROFILE
/*
- * Generate the functions needed for tracepoint perf_counter support.
+ * Generate the functions needed for tracepoint perf_event support.
*
* NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
*
- * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
+ * static int ftrace_profile_enable_<call>(void)
* {
- * int ret = 0;
- *
- * if (!atomic_inc_return(&event_call->profile_count))
- * ret = register_trace_<call>(ftrace_profile_<call>);
- *
- * return ret;
+ * return register_trace_<call>(ftrace_profile_<call>);
* }
*
- * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
+ * static void ftrace_profile_disable_<call>(void)
* {
- * if (atomic_add_negative(-1, &event->call->profile_count))
- * unregister_trace_<call>(ftrace_profile_<call>);
+ * unregister_trace_<call>(ftrace_profile_<call>);
* }
*
*/
\
static void ftrace_profile_##call(proto); \
\
- static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
-static int ftrace_profile_enable_##call(void) \
++static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\
{ \
- int ret = 0; \
- \
- if (!atomic_inc_return(&event_call->profile_count)) \
- ret = register_trace_##call(ftrace_profile_##call); \
- \
- return ret; \
+ return register_trace_##call(ftrace_profile_##call); \
} \
\
- static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
-static void ftrace_profile_disable_##call(void) \
++static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
{ \
- if (atomic_add_negative(-1, &event_call->profile_count)) \
- unregister_trace_##call(ftrace_profile_##call); \
+ unregister_trace_##call(ftrace_profile_##call); \
}
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
* event_trace_printk(_RET_IP_, "<call>: " <fmt>);
* }
*
- * static int ftrace_reg_event_<call>(void)
+ * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
* {
* int ret;
*
* return ret;
* }
*
- * static void ftrace_unreg_event_<call>(void)
+ * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
* {
* unregister_trace_<call>(ftrace_event_<call>);
* }
* trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc);
* }
*
- * static int ftrace_raw_reg_event_<call>(void)
+ * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
* {
* int ret;
*
* return ret;
* }
*
- * static void ftrace_unreg_event_<call>(void)
+ * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
* {
* unregister_trace_<call>(ftrace_raw_event_<call>);
* }
* .trace = ftrace_raw_output_<call>, <-- stage 2
* };
*
- * static int ftrace_raw_init_event_<call>(void)
+ * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
* {
* int id;
*
event, irq_flags, pc); \
} \
\
-static int ftrace_raw_reg_event_##call(void *ptr) \
+static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
{ \
int ret; \
\
return ret; \
} \
\
-static void ftrace_raw_unreg_event_##call(void *ptr) \
+static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
{ \
unregister_trace_##call(ftrace_raw_event_##call); \
} \
.trace = ftrace_raw_output_##call, \
}; \
\
-static int ftrace_raw_init_event_##call(void) \
+static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
{ \
int id; \
\
* {
* struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
* struct ftrace_event_call *event_call = &event_<call>;
- * extern void perf_tpcounter_event(int, u64, u64, void *, int);
+ * extern void perf_tp_event(int, u64, u64, void *, int);
* struct ftrace_raw_##call *entry;
* u64 __addr = 0, __count = 1;
* unsigned long irq_flags;
+ * struct trace_entry *ent;
* int __entry_size;
* int __data_size;
+ * int __cpu
* int pc;
*
- * local_save_flags(irq_flags);
* pc = preempt_count();
*
* __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
* sizeof(u64));
* __entry_size -= sizeof(u32);
*
- * do {
- * char raw_data[__entry_size]; <- allocate our sample in the stack
- * struct trace_entry *ent;
+ * // Protect the non nmi buffer
+ * // This also protects the rcu read side
+ * local_irq_save(irq_flags);
+ * __cpu = smp_processor_id();
+ *
+ * if (in_nmi())
+ * raw_data = rcu_dereference(trace_profile_buf_nmi);
+ * else
+ * raw_data = rcu_dereference(trace_profile_buf);
+ *
+ * if (!raw_data)
+ * goto end;
*
- * zero dead bytes from alignment to avoid stack leak to userspace:
+ * raw_data = per_cpu_ptr(raw_data, __cpu);
*
- * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
- * entry = (struct ftrace_raw_<call> *)raw_data;
- * ent = &entry->ent;
- * tracing_generic_entry_update(ent, irq_flags, pc);
- * ent->type = event_call->id;
+ * //zero dead bytes from alignment to avoid stack leak to userspace:
+ * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
+ * entry = (struct ftrace_raw_<call> *)raw_data;
+ * ent = &entry->ent;
+ * tracing_generic_entry_update(ent, irq_flags, pc);
+ * ent->type = event_call->id;
*
- * <tstruct> <- do some jobs with dynamic arrays
+ * <tstruct> <- do some jobs with dynamic arrays
*
- * <assign> <- affect our values
+ * <assign> <- affect our values
*
- * perf_tpcounter_event(event_call->id, __addr, __count, entry,
- * __entry_size); <- submit them to perf counter
- * } while (0);
+ * perf_tp_event(event_call->id, __addr, __count, entry,
+ * __entry_size); <- submit them to perf counter
*
* }
*/
{ \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ftrace_event_call *event_call = &event_##call; \
- extern void perf_tpcounter_event(int, u64, u64, void *, int); \
+ extern void perf_tp_event(int, u64, u64, void *, int); \
struct ftrace_raw_##call *entry; \
u64 __addr = 0, __count = 1; \
unsigned long irq_flags; \
+ struct trace_entry *ent; \
int __entry_size; \
int __data_size; \
+ char *raw_data; \
+ int __cpu; \
int pc; \
\
- local_save_flags(irq_flags); \
pc = preempt_count(); \
\
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
sizeof(u64)); \
__entry_size -= sizeof(u32); \
\
- do { \
- char raw_data[__entry_size]; \
- struct trace_entry *ent; \
+ if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \
+ "profile buffer not large enough")) \
+ return; \
+ \
+ local_irq_save(irq_flags); \
+ __cpu = smp_processor_id(); \
\
- *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
- entry = (struct ftrace_raw_##call *)raw_data; \
- ent = &entry->ent; \
- tracing_generic_entry_update(ent, irq_flags, pc); \
- ent->type = event_call->id; \
+ if (in_nmi()) \
+ raw_data = rcu_dereference(trace_profile_buf_nmi); \
+ else \
+ raw_data = rcu_dereference(trace_profile_buf); \
\
- tstruct \
+ if (!raw_data) \
+ goto end; \
\
- { assign; } \
+ raw_data = per_cpu_ptr(raw_data, __cpu); \
\
- perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
+ entry = (struct ftrace_raw_##call *)raw_data; \
+ ent = &entry->ent; \
+ tracing_generic_entry_update(ent, irq_flags, pc); \
+ ent->type = event_call->id; \
+ \
+ tstruct \
+ \
+ { assign; } \
+ \
+ perf_tp_event(event_call->id, __addr, __count, entry, \
__entry_size); \
- } while (0); \
+ \
+ end: \
+ local_irq_restore(irq_flags); \
\
}
*/
static struct kprobe_blackpoint kprobe_blacklist[] = {
{"preempt_schedule",},
+ {"native_get_debugreg",},
+ {"irq_entries_start",},
+ {"common_interrupt",},
{NULL} /* Terminator */
};
return (kprobe_opcode_t *)(((char *)addr) + p->offset);
}
+/* Check passed kprobe is valid and return kprobe in kprobe_table. */
+static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+{
+ struct kprobe *old_p, *list_p;
+
+ old_p = get_kprobe(p->addr);
+ if (unlikely(!old_p))
+ return NULL;
+
+ if (p != old_p) {
+ list_for_each_entry_rcu(list_p, &old_p->list, list)
+ if (list_p == p)
+ /* kprobe p is a valid probe */
+ goto valid;
+ return NULL;
+ }
+valid:
+ return old_p;
+}
+
+/* Return error if the kprobe is being re-registered */
+static inline int check_kprobe_rereg(struct kprobe *p)
+{
+ int ret = 0;
+ struct kprobe *old_p;
+
+ mutex_lock(&kprobe_mutex);
+ old_p = __get_valid_kprobe(p);
+ if (old_p)
+ ret = -EINVAL;
+ mutex_unlock(&kprobe_mutex);
+ return ret;
+}
+
int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
return -EINVAL;
p->addr = addr;
+ ret = check_kprobe_rereg(p);
+ if (ret)
+ return ret;
+
preempt_disable();
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr)) {
}
EXPORT_SYMBOL_GPL(register_kprobe);
-/* Check passed kprobe is valid and return kprobe in kprobe_table. */
-static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
-{
- struct kprobe *old_p, *list_p;
-
- old_p = get_kprobe(p->addr);
- if (unlikely(!old_p))
- return NULL;
-
- if (p != old_p) {
- list_for_each_entry_rcu(list_p, &old_p->list, list)
- if (list_p == p)
- /* kprobe p is a valid probe */
- goto valid;
- return NULL;
- }
-valid:
- return old_p;
-}
-
/*
* Unregister a kprobe without a scheduler synchronization.
*/
arch_remove_kprobe(p);
}
+void __kprobes dump_kprobe(struct kprobe *kp)
+{
+ printk(KERN_WARNING "Dumping kprobe:\n");
+ printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
+ kp->symbol_name, kp->addr, kp->offset);
+}
+
/* Module notifier call back, checking kprobes on the module */
static int __kprobes kprobes_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
return 0;
}
- static struct seq_operations kprobes_seq_ops = {
+ static const struct seq_operations kprobes_seq_ops = {
.start = kprobe_seq_start,
.next = kprobe_seq_next,
.stop = kprobe_seq_stop,
config HAVE_FTRACE_NMI_ENTER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_TRACER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_GRAPH_TRACER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_GRAPH_FP_TEST
bool
config HAVE_FUNCTION_TRACE_MCOUNT_TEST
bool
help
- This gets selected when the arch tests the function_trace_stop
- variable at the mcount call site. Otherwise, this variable
- is tested by the called function.
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_DYNAMIC_FTRACE
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FTRACE_MCOUNT_RECORD
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_HW_BRANCH_TRACER
bool
config HAVE_SYSCALL_TRACEPOINTS
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config TRACER_MAX_TRACE
bool
# This allows those options to appear when no other tracer is selected. But the
# options do not appear when something else selects it. We need the two options
# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
- # hidding of the automatic options options.
+ # hidding of the automatic options.
config TRACING
bool
If unsure, say N.
+config KPROBE_TRACER
+ depends on KPROBES
+ depends on X86
+ bool "Trace kprobes"
+ select TRACING
+ select GENERIC_TRACER
+ help
+ This tracer probes everywhere where kprobes can probe it, and
+ records various registers and memories specified by user.
+ This also allows you to trace kprobe probe points as a dynamic
+ defined events. It provides per-probe event filtering interface.
+
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FUNCTION_TRACER
functioning properly. It will do tests on all the configured
tracers of ftrace.
+ config EVENT_TRACE_TEST_SYSCALLS
+ bool "Run selftest on syscall events"
+ depends on FTRACE_STARTUP_TEST
+ help
+ This option will also enable testing every syscall event.
+ It only enables the event and disables it and runs various loads
+ with the event enabled. This adds a bit more time for kernel boot
+ up since it runs this on every system call defined.
+
+ TBD - enable a way to actually call the syscalls as we test their
+ events
+
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on HAVE_MMIOTRACE_SUPPORT && PCI
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
- obj-$(CONFIG_POWER_TRACER) += trace_power.o
obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_KPROBE_TRACER) += trace_kprobe.o
+ obj-$(CONFIG_EVENT_TRACING) += power-traces.o
libftrace-y := ftrace.o
#include <linux/clocksource.h>
#include <linux/ring_buffer.h>
#include <linux/mmiotrace.h>
+ #include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <trace/boot.h>
#include <linux/kmemtrace.h>
- #include <trace/power.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
TRACE_HW_BRANCHES,
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
- TRACE_POWER,
TRACE_BLK,
__TRACE_LAST_TYPE,
};
- /*
- * Function trace entry - function address and parent function addres:
- */
- struct ftrace_entry {
- struct trace_entry ent;
- unsigned long ip;
- unsigned long parent_ip;
- };
-
- /* Function call entry */
- struct ftrace_graph_ent_entry {
- struct trace_entry ent;
- struct ftrace_graph_ent graph_ent;
+ enum kmemtrace_type_id {
+ KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
+ KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
+ KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
};
- /* Function return entry */
- struct ftrace_graph_ret_entry {
- struct trace_entry ent;
- struct ftrace_graph_ret ret;
- };
extern struct tracer boot_tracer;
- /*
- * Context switch trace entry - which task (and prio) we switched from/to:
- */
- struct ctx_switch_entry {
- struct trace_entry ent;
- unsigned int prev_pid;
- unsigned char prev_prio;
- unsigned char prev_state;
- unsigned int next_pid;
- unsigned char next_prio;
- unsigned char next_state;
- unsigned int next_cpu;
- };
-
- /*
- * Special (free-form) trace entry:
- */
- struct special_entry {
- struct trace_entry ent;
- unsigned long arg1;
- unsigned long arg2;
- unsigned long arg3;
- };
-
- /*
- * Stack-trace entry:
- */
-
- #define FTRACE_STACK_ENTRIES 8
+ #undef __field
+ #define __field(type, item) type item;
- struct stack_entry {
- struct trace_entry ent;
- unsigned long caller[FTRACE_STACK_ENTRIES];
- };
+ #undef __field_struct
+ #define __field_struct(type, item) __field(type, item)
- struct userstack_entry {
- struct trace_entry ent;
- unsigned long caller[FTRACE_STACK_ENTRIES];
- };
+ #undef __field_desc
+ #define __field_desc(type, container, item)
- /*
- * trace_printk entry:
- */
- struct bprint_entry {
- struct trace_entry ent;
- unsigned long ip;
- const char *fmt;
- u32 buf[];
- };
+ #undef __array
+ #define __array(type, item, size) type item[size];
- struct print_entry {
- struct trace_entry ent;
- unsigned long ip;
- char buf[];
- };
-
- #define TRACE_OLD_SIZE 88
-
- struct trace_field_cont {
- unsigned char type;
- /* Temporary till we get rid of this completely */
- char buf[TRACE_OLD_SIZE - 1];
- };
+ #undef __array_desc
+ #define __array_desc(type, container, item, size)
- struct trace_mmiotrace_rw {
- struct trace_entry ent;
- struct mmiotrace_rw rw;
- };
+ #undef __dynamic_array
+ #define __dynamic_array(type, item) type item[];
- struct trace_mmiotrace_map {
- struct trace_entry ent;
- struct mmiotrace_map map;
- };
-
- struct trace_boot_call {
- struct trace_entry ent;
- struct boot_trace_call boot_call;
- };
+ #undef F_STRUCT
+ #define F_STRUCT(args...) args
- struct trace_boot_ret {
- struct trace_entry ent;
- struct boot_trace_ret boot_ret;
- };
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
+ struct struct_name { \
+ struct trace_entry ent; \
+ tstruct \
+ }
- #define TRACE_FUNC_SIZE 30
- #define TRACE_FILE_SIZE 20
- struct trace_branch {
- struct trace_entry ent;
- unsigned line;
- char func[TRACE_FUNC_SIZE+1];
- char file[TRACE_FILE_SIZE+1];
- char correct;
- };
+ #undef TP_ARGS
+ #define TP_ARGS(args...) args
- struct hw_branch_entry {
- struct trace_entry ent;
- u64 from;
- u64 to;
- };
+ #undef FTRACE_ENTRY_DUP
+ #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
- struct trace_power {
- struct trace_entry ent;
- struct power_trace state_data;
- };
-
- enum kmemtrace_type_id {
- KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
- KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
- KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
- };
-
- struct kmemtrace_alloc_entry {
- struct trace_entry ent;
- enum kmemtrace_type_id type_id;
- unsigned long call_site;
- const void *ptr;
- size_t bytes_req;
- size_t bytes_alloc;
- gfp_t gfp_flags;
- int node;
- };
-
- struct kmemtrace_free_entry {
- struct trace_entry ent;
- enum kmemtrace_type_id type_id;
- unsigned long call_site;
- const void *ptr;
- };
+ #include "trace_entries.h"
+ /*
+ * syscalls are special, and need special handling, this is why
+ * they are not included in trace_entries.h
+ */
struct syscall_trace_enter {
struct trace_entry ent;
int nr;
unsigned long ret;
};
-
-
+struct kprobe_trace_entry {
+ struct trace_entry ent;
+ unsigned long ip;
+ int nargs;
+ unsigned long args[];
+};
+
+#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
+ (offsetof(struct kprobe_trace_entry, args) + \
+ (sizeof(unsigned long) * (n)))
+
+struct kretprobe_trace_entry {
+ struct trace_entry ent;
+ unsigned long func;
+ unsigned long ret_ip;
+ int nargs;
+ unsigned long args[];
+};
+
+#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
+ (offsetof(struct kretprobe_trace_entry, args) + \
+ (sizeof(unsigned long) * (n)))
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
* IRQS_OFF - interrupts were disabled
* IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
- * NEED_RESCED - reschedule is requested
+ * NEED_RESCHED - reschedule is requested
* HARDIRQ - inside an interrupt handler
* SOFTIRQ - inside a softirq handler
*/
IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
- IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
struct tracer *next;
int print_max;
struct tracer_flags *flags;
- struct tracer_stat *stats;
};
void tracing_start_sched_switch_record(void);
int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
+ int is_tracing_stopped(void);
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
extern cycle_t ftrace_now(int cpu);
- #ifdef CONFIG_CONTEXT_SWITCH_TRACER
- typedef void
- (*tracer_switch_func_t)(void *private,
- void *__rq,
- struct task_struct *prev,
- struct task_struct *next);
-
- struct tracer_switch_ops {
- tracer_switch_func_t func;
- void *private;
- struct tracer_switch_ops *next;
- };
- #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
-
extern void trace_find_cmdline(int pid, char comm[]);
#ifdef CONFIG_DYNAMIC_FTRACE
}
#endif
+ /*
+ * struct trace_parser - servers for reading the user input separated by spaces
+ * @cont: set if the input is not complete - no final space char was found
+ * @buffer: holds the parsed user input
+ * @idx: user input lenght
+ * @size: buffer size
+ */
+ struct trace_parser {
+ bool cont;
+ char *buffer;
+ unsigned idx;
+ unsigned size;
+ };
+
+ static inline bool trace_parser_loaded(struct trace_parser *parser)
+ {
+ return (parser->idx != 0);
+ }
+
+ static inline bool trace_parser_cont(struct trace_parser *parser)
+ {
+ return parser->cont;
+ }
+
+ static inline void trace_parser_clear(struct trace_parser *parser)
+ {
+ parser->cont = false;
+ parser->idx = 0;
+ }
+
+ extern int trace_parser_get_init(struct trace_parser *parser, int size);
+ extern void trace_parser_put(struct trace_parser *parser);
+ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
+ size_t cnt, loff_t *ppos);
+
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
return 0;
}
- #define DEFINE_COMPARISON_PRED(type) \
- static int filter_pred_##type(struct filter_pred *pred, void *event, \
- int val1, int val2) \
- { \
- type *addr = (type *)(event + pred->offset); \
- type val = (type)pred->val; \
- int match = 0; \
- \
- switch (pred->op) { \
- case OP_LT: \
- match = (*addr < val); \
- break; \
- case OP_LE: \
- match = (*addr <= val); \
- break; \
- case OP_GT: \
- match = (*addr > val); \
- break; \
- case OP_GE: \
- match = (*addr >= val); \
- break; \
- default: \
- break; \
- } \
- \
- return match; \
- }
-
- #define DEFINE_EQUALITY_PRED(size) \
- static int filter_pred_##size(struct filter_pred *pred, void *event, \
- int val1, int val2) \
- { \
- u##size *addr = (u##size *)(event + pred->offset); \
- u##size val = (u##size)pred->val; \
- int match; \
- \
- match = (val == *addr) ^ pred->not; \
- \
- return match; \
- }
-
extern struct mutex event_mutex;
extern struct list_head ftrace_events;
extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
extern struct ftrace_event_call event_##call;
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
- #include "trace_event_types.h"
+ #undef FTRACE_ENTRY_DUP
+ #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
+ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
+ #include "trace_entries.h"
#endif /* _LINUX_KERNEL_TRACE_H */
*
*/
+ #include <linux/module.h>
#include "trace.h"
- ret = event->profile_enable();
+ /*
+ * We can't use a size but a type in alloc_percpu()
+ * So let's create a dummy type that matches the desired size
+ */
+ typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
+
+ char *trace_profile_buf;
+ EXPORT_SYMBOL_GPL(trace_profile_buf);
+
+ char *trace_profile_buf_nmi;
+ EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
+
+ /* Count the events in use (per event id, not per instance) */
+ static int total_profile_count;
+
+ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
+ {
+ char *buf;
+ int ret = -ENOMEM;
+
+ if (atomic_inc_return(&event->profile_count))
+ return 0;
+
+ if (!total_profile_count++) {
+ buf = (char *)alloc_percpu(profile_buf_t);
+ if (!buf)
+ goto fail_buf;
+
+ rcu_assign_pointer(trace_profile_buf, buf);
+
+ buf = (char *)alloc_percpu(profile_buf_t);
+ if (!buf)
+ goto fail_buf_nmi;
+
+ rcu_assign_pointer(trace_profile_buf_nmi, buf);
+ }
+
++ ret = event->profile_enable(event);
+ if (!ret)
+ return 0;
+
+ kfree(trace_profile_buf_nmi);
+ fail_buf_nmi:
+ kfree(trace_profile_buf);
+ fail_buf:
+ total_profile_count--;
+ atomic_dec(&event->profile_count);
+
+ return ret;
+ }
+
int ftrace_profile_enable(int event_id)
{
struct ftrace_event_call *event;
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
- if (event->id == event_id && event->profile_enable) {
- ret = event->profile_enable(event);
+ if (event->id == event_id && event->profile_enable &&
+ try_module_get(event->mod)) {
+ ret = ftrace_profile_enable_event(event);
break;
}
}
return ret;
}
- event->profile_disable();
+ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
+ {
+ char *buf, *nmi_buf;
+
+ if (!atomic_add_negative(-1, &event->profile_count))
+ return;
+
++ event->profile_disable(event);
+
+ if (!--total_profile_count) {
+ buf = trace_profile_buf;
+ rcu_assign_pointer(trace_profile_buf, NULL);
+
+ nmi_buf = trace_profile_buf_nmi;
+ rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+
+ /*
+ * Ensure every events in profiling have finished before
+ * releasing the buffers
+ */
+ synchronize_sched();
+
+ free_percpu(buf);
+ free_percpu(nmi_buf);
+ }
+ }
+
void ftrace_profile_disable(int event_id)
{
struct ftrace_event_call *event;
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) {
- event->profile_disable(event);
+ ftrace_profile_disable_event(event);
+ module_put(event->mod);
break;
}
}
#include "trace_output.h"
+ #undef TRACE_SYSTEM
#define TRACE_SYSTEM "TRACE_SYSTEM"
DEFINE_MUTEX(event_mutex);
__common_field(unsigned char, flags);
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
- __common_field(int, tgid);
+ __common_field(int, lock_depth);
return ret;
}
EXPORT_SYMBOL_GPL(trace_define_common_fields);
-#ifdef CONFIG_MODULES
-
-static void trace_destroy_fields(struct ftrace_event_call *call)
+void trace_destroy_fields(struct ftrace_event_call *call)
{
struct ftrace_event_field *field, *next;
}
}
-#endif /* CONFIG_MODULES */
-
static void ftrace_event_enable_disable(struct ftrace_event_call *call,
int enable)
{
if (call->enabled) {
call->enabled = 0;
tracing_stop_cmdline_record();
- call->unregfunc(call->data);
+ call->unregfunc(call);
}
break;
case 1:
if (!call->enabled) {
call->enabled = 1;
tracing_start_cmdline_record();
- call->regfunc(call->data);
+ call->regfunc(call);
}
break;
}
ftrace_event_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ struct trace_parser parser;
size_t read = 0;
- int i, set = 1;
ssize_t ret;
- char *buf;
- char ch;
if (!cnt || cnt < 0)
return 0;
if (ret < 0)
return ret;
- ret = get_user(ch, ubuf++);
- if (ret)
- return ret;
- read++;
- cnt--;
-
- /* skip white space */
- while (cnt && isspace(ch)) {
- ret = get_user(ch, ubuf++);
- if (ret)
- return ret;
- read++;
- cnt--;
- }
-
- /* Only white space found? */
- if (isspace(ch)) {
- file->f_pos += read;
- ret = read;
- return ret;
- }
-
- buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
- if (!buf)
+ if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
return -ENOMEM;
- if (cnt > EVENT_BUF_SIZE)
- cnt = EVENT_BUF_SIZE;
+ read = trace_get_user(&parser, ubuf, cnt, ppos);
+
+ if (trace_parser_loaded((&parser))) {
+ int set = 1;
- i = 0;
- while (cnt && !isspace(ch)) {
- if (!i && ch == '!')
+ if (*parser.buffer == '!')
set = 0;
- else
- buf[i++] = ch;
- ret = get_user(ch, ubuf++);
+ parser.buffer[parser.idx] = 0;
+
+ ret = ftrace_set_clr_event(parser.buffer + !set, set);
if (ret)
- goto out_free;
- read++;
- cnt--;
+ goto out_put;
}
- buf[i] = 0;
-
- file->f_pos += read;
-
- ret = ftrace_set_clr_event(buf, set);
- if (ret)
- goto out_free;
ret = read;
- out_free:
- kfree(buf);
+ out_put:
+ trace_parser_put(&parser);
return ret;
}
static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct list_head *list = m->private;
- struct ftrace_event_call *call;
+ struct ftrace_event_call *call = v;
(*pos)++;
- for (;;) {
- if (list == &ftrace_events)
- return NULL;
-
- call = list_entry(list, struct ftrace_event_call, list);
-
+ list_for_each_entry_continue(call, &ftrace_events, list) {
/*
* The ftrace subsystem is for showing formats only.
* They can not be enabled or disabled via the event files.
*/
if (call->regfunc)
- break;
-
- list = list->next;
+ return call;
}
- m->private = list->next;
-
- return call;
+ return NULL;
}
static void *t_start(struct seq_file *m, loff_t *pos)
{
- struct ftrace_event_call *call = NULL;
+ struct ftrace_event_call *call;
loff_t l;
mutex_lock(&event_mutex);
- m->private = ftrace_events.next;
+ call = list_entry(&ftrace_events, struct ftrace_event_call, list);
for (l = 0; l <= *pos; ) {
- call = t_next(m, NULL, &l);
+ call = t_next(m, call, &l);
if (!call)
break;
}
static void *
s_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct list_head *list = m->private;
- struct ftrace_event_call *call;
+ struct ftrace_event_call *call = v;
(*pos)++;
- retry:
- if (list == &ftrace_events)
- return NULL;
-
- call = list_entry(list, struct ftrace_event_call, list);
-
- if (!call->enabled) {
- list = list->next;
- goto retry;
+ list_for_each_entry_continue(call, &ftrace_events, list) {
+ if (call->enabled)
+ return call;
}
- m->private = list->next;
-
- return call;
+ return NULL;
}
static void *s_start(struct seq_file *m, loff_t *pos)
{
- struct ftrace_event_call *call = NULL;
+ struct ftrace_event_call *call;
loff_t l;
mutex_lock(&event_mutex);
- m->private = ftrace_events.next;
+ call = list_entry(&ftrace_events, struct ftrace_event_call, list);
for (l = 0; l <= *pos; ) {
- call = s_next(m, NULL, &l);
+ call = s_next(m, call, &l);
if (!call)
break;
}
FIELD(unsigned char, flags),
FIELD(unsigned char, preempt_count),
FIELD(int, pid),
- FIELD(int, tgid));
+ FIELD(int, lock_depth));
}
static ssize_t
return 0;
}
-#define for_each_event(event, start, end) \
- for (event = start; \
- (unsigned long)event < (unsigned long)end; \
- event++)
+static int __trace_add_event_call(struct ftrace_event_call *call)
+{
+ struct dentry *d_events;
+ int ret;
-#ifdef CONFIG_MODULES
+ if (!call->name)
+ return -EINVAL;
-static LIST_HEAD(ftrace_module_file_list);
+ if (call->raw_init) {
+ ret = call->raw_init(call);
+ if (ret < 0) {
+ if (ret != -ENOSYS)
+ pr_warning("Could not initialize trace "
+ "events/%s\n", call->name);
+ return ret;
+ }
+ }
-/*
- * Modules must own their file_operations to keep up with
- * reference counting.
- */
-struct ftrace_module_file_ops {
- struct list_head list;
- struct module *mod;
- struct file_operations id;
- struct file_operations enable;
- struct file_operations format;
- struct file_operations filter;
-};
+ d_events = event_trace_events_dir();
+ if (!d_events)
+ return -ENOENT;
+
+ list_add(&call->list, &ftrace_events);
+ ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
+ &ftrace_enable_fops, &ftrace_event_filter_fops,
+ &ftrace_event_format_fops);
+ if (ret < 0)
+ list_del(&call->list);
+ return ret;
+}
+
+/* Add an additional event_call dynamically */
+int trace_add_event_call(struct ftrace_event_call *call)
+{
+ int ret;
+ mutex_lock(&event_mutex);
+ ret = __trace_add_event_call(call);
+ mutex_unlock(&event_mutex);
+ return ret;
+}
static void remove_subsystem_dir(const char *name)
{
}
}
+/*
+ * Must be called under locking both of event_mutex and trace_event_mutex.
+ */
+static void __trace_remove_event_call(struct ftrace_event_call *call)
+{
+ ftrace_event_enable_disable(call, 0);
+ if (call->event)
+ __unregister_ftrace_event(call->event);
+ debugfs_remove_recursive(call->dir);
+ list_del(&call->list);
+ trace_destroy_fields(call);
+ destroy_preds(call);
+ remove_subsystem_dir(call->system);
+}
+
+/* Remove an event_call */
+void trace_remove_event_call(struct ftrace_event_call *call)
+{
+ mutex_lock(&event_mutex);
+ down_write(&trace_event_mutex);
+ __trace_remove_event_call(call);
+ up_write(&trace_event_mutex);
+ mutex_unlock(&event_mutex);
+}
+
+#define for_each_event(event, start, end) \
+ for (event = start; \
+ (unsigned long)event < (unsigned long)end; \
+ event++)
+
+#ifdef CONFIG_MODULES
+
+static LIST_HEAD(ftrace_module_file_list);
+
+/*
+ * Modules must own their file_operations to keep up with
+ * reference counting.
+ */
+struct ftrace_module_file_ops {
+ struct list_head list;
+ struct module *mod;
+ struct file_operations id;
+ struct file_operations enable;
+ struct file_operations format;
+ struct file_operations filter;
+};
+
static struct ftrace_module_file_ops *
trace_create_file_ops(struct module *mod)
{
if (!call->name)
continue;
if (call->raw_init) {
- ret = call->raw_init();
+ ret = call->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
pr_warning("Could not initialize trace "
list_for_each_entry_safe(call, p, &ftrace_events, list) {
if (call->mod == mod) {
found = true;
- ftrace_event_enable_disable(call, 0);
- if (call->event)
- __unregister_ftrace_event(call->event);
- debugfs_remove_recursive(call->dir);
- list_del(&call->list);
- trace_destroy_fields(call);
- destroy_preds(call);
- remove_subsystem_dir(call->system);
+ __trace_remove_event_call(call);
}
}
}
#endif /* CONFIG_MODULES */
- struct notifier_block trace_module_nb = {
+ static struct notifier_block trace_module_nb = {
.notifier_call = trace_module_notify,
.priority = 0,
};
if (!call->name)
continue;
if (call->raw_init) {
- ret = call->raw_init();
+ ret = call->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
pr_warning("Could not initialize trace "
if (!call->regfunc)
continue;
+ /*
+ * Testing syscall events here is pretty useless, but
+ * we still do it if configured. But this is time consuming.
+ * What we really need is a user thread to perform the
+ * syscalls as we test.
+ */
+ #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
+ if (call->system &&
+ strcmp(call->system, "syscalls") == 0)
+ continue;
+ #endif
+
pr_info("Testing event %s: ", call->name);
/*
#ifdef CONFIG_FUNCTION_TRACER
- static DEFINE_PER_CPU(atomic_t, test_event_disable);
+ static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
static void
function_test_events_call(unsigned long ip, unsigned long parent_ip)
pc = preempt_count();
resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
- disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+ disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
if (disabled != 1)
goto out;
trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
out:
- atomic_dec(&per_cpu(test_event_disable, cpu));
+ atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
ftrace_preempt_enable(resched);
}
#include "trace_output.h"
+ #undef TRACE_SYSTEM
+ #define TRACE_SYSTEM ftrace
- #undef TRACE_STRUCT
- #define TRACE_STRUCT(args...) args
+ /* not needed for this file */
+ #undef __field_struct
+ #define __field_struct(type, item)
- extern void __bad_type_size(void);
+ #undef __field
+ #define __field(type, item) type item;
- #undef TRACE_FIELD
- #define TRACE_FIELD(type, item, assign) \
- if (sizeof(type) != sizeof(field.item)) \
- __bad_type_size(); \
+ #undef __field_desc
+ #define __field_desc(type, container, item) type item;
+
+ #undef __array
+ #define __array(type, item, size) type item[size];
+
+ #undef __array_desc
+ #define __array_desc(type, container, item, size) type item[size];
+
+ #undef __dynamic_array
+ #define __dynamic_array(type, item) type item[];
+
+ #undef F_STRUCT
+ #define F_STRUCT(args...) args
+
+ #undef F_printk
+ #define F_printk(fmt, args...) fmt, args
+
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
+ struct ____ftrace_##name { \
+ tstruct \
+ }; \
+ static void __used ____ftrace_check_##name(void) \
+ { \
+ struct ____ftrace_##name *__entry = NULL; \
+ \
+ /* force cmpile-time check on F_printk() */ \
+ printk(print); \
+ }
+
+ #undef FTRACE_ENTRY_DUP
+ #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
+ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+
+ #include "trace_entries.h"
+
+
+ #undef __field
+ #define __field(type, item) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%u;\tsize:%u;\n", \
- (unsigned int)offsetof(typeof(field), item), \
- (unsigned int)sizeof(field.item)); \
+ "offset:%zu;\tsize:%zu;\n", \
+ offsetof(typeof(field), item), \
+ sizeof(field.item)); \
if (!ret) \
return 0;
+ #undef __field_desc
+ #define __field_desc(type, container, item) \
+ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
+ "offset:%zu;\tsize:%zu;\n", \
+ offsetof(typeof(field), container.item), \
+ sizeof(field.container.item)); \
+ if (!ret) \
+ return 0;
- #undef TRACE_FIELD_SPECIAL
- #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
- ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \
- "offset:%u;\tsize:%u;\n", \
- (unsigned int)offsetof(typeof(field), item), \
- (unsigned int)sizeof(field.item)); \
+ #undef __array
+ #define __array(type, item, len) \
+ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
+ "offset:%zu;\tsize:%zu;\n", \
+ offsetof(typeof(field), item), \
+ sizeof(field.item)); \
if (!ret) \
return 0;
- #undef TRACE_FIELD_ZERO
- #define TRACE_FIELD_ZERO(type, item) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%u;\tsize:0;\n", \
- (unsigned int)offsetof(typeof(field), item)); \
+ #undef __array_desc
+ #define __array_desc(type, container, item, len) \
+ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
+ "offset:%zu;\tsize:%zu;\n", \
+ offsetof(typeof(field), container.item), \
+ sizeof(field.container.item)); \
if (!ret) \
return 0;
- #undef TRACE_FIELD_SIGN
- #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
- TRACE_FIELD(type, item, assign)
+ #undef __dynamic_array
+ #define __dynamic_array(type, item) \
+ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
+ "offset:%zu;\tsize:0;\n", \
+ offsetof(typeof(field), item)); \
+ if (!ret) \
+ return 0;
- #undef TP_RAW_FMT
- #define TP_RAW_FMT(args...) args
+ #undef F_printk
+ #define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
- static int \
- ftrace_format_##call(struct ftrace_event_call *unused, \
- struct trace_seq *s) \
- { \
- struct args field; \
- int ret; \
- \
- tstruct; \
- \
- trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
- \
- return ret; \
- }
+ #undef __entry
+ #define __entry REC
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
- tpfmt) \
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
static int \
- ftrace_format_##call(struct ftrace_event_call *unused, \
- struct trace_seq *s) \
+ ftrace_format_##name(struct ftrace_event_call *unused, \
+ struct trace_seq *s) \
{ \
- struct args field; \
- int ret; \
+ struct struct_name field __attribute__((unused)); \
+ int ret = 0; \
\
tstruct; \
\
- trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
+ trace_seq_printf(s, "\nprint fmt: " print); \
\
return ret; \
}
- #include "trace_event_types.h"
-
- #undef TRACE_FIELD
- #define TRACE_FIELD(type, item, assign)\
- entry->item = assign;
-
- #undef TRACE_FIELD
- #define TRACE_FIELD(type, item, assign)\
- entry->item = assign;
-
- #undef TRACE_FIELD_SIGN
- #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
- TRACE_FIELD(type, item, assign)
-
- #undef TRACE_FIELD_ZERO
- #define TRACE_FIELD_ZERO(type, item)
-
- #undef TP_CMD
- #define TP_CMD(cmd...) cmd
-
- #undef TRACE_ENTRY
- #define TRACE_ENTRY entry
-
- #undef TRACE_FIELD_SPECIAL
- #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
- cmd;
-
- static int ftrace_raw_init_event(struct ftrace_event_call *event_call)
- {
- INIT_LIST_HEAD(&event_call->fields);
-
- return 0;
- }
-
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
- int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \
- \
- struct ftrace_event_call __used \
- __attribute__((__aligned__(4))) \
- __attribute__((section("_ftrace_events"))) event_##call = { \
- .name = #call, \
- .id = proto, \
- .system = __stringify(TRACE_SYSTEM), \
- .raw_init = ftrace_raw_init_event, \
- .show_format = ftrace_format_##call, \
- .define_fields = ftrace_define_fields_##call, \
- }; \
-
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
- tpfmt) \
- \
- struct ftrace_event_call __used \
- __attribute__((__aligned__(4))) \
- __attribute__((section("_ftrace_events"))) event_##call = { \
- .name = #call, \
- .id = proto, \
- .system = __stringify(TRACE_SYSTEM), \
- .show_format = ftrace_format_##call, \
- };
-
- #include "trace_event_types.h"
+ #include "trace_entries.h"
- #undef TRACE_FIELD
- #define TRACE_FIELD(type, item, assign) \
-
+ #undef __field
+ #define __field(type, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
if (ret) \
return ret;
- #undef TRACE_FIELD_SPECIAL
- #define TRACE_FIELD_SPECIAL(type, item, len, cmd) \
+ #undef __field_desc
+ #define __field_desc(type, container, item) \
+ ret = trace_define_field(event_call, #type, #item, \
+ offsetof(typeof(field), \
+ container.item), \
+ sizeof(field.container.item), \
+ is_signed_type(type), FILTER_OTHER); \
+ if (ret) \
+ return ret;
+
+ #undef __array
+ #define __array(type, item, len) \
+ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
offsetof(typeof(field), item), \
sizeof(field.item), 0, FILTER_OTHER); \
if (ret) \
return ret;
- #undef TRACE_FIELD_SIGN
- #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
- ret = trace_define_field(event_call, #type, #item, \
- offsetof(typeof(field), item), \
- sizeof(field.item), is_signed, \
+ #undef __array_desc
+ #define __array_desc(type, container, item, len) \
+ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
+ ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+ offsetof(typeof(field), \
+ container.item), \
+ sizeof(field.container.item), 0, \
FILTER_OTHER); \
if (ret) \
return ret;
- #undef TRACE_FIELD_ZERO
- #define TRACE_FIELD_ZERO(type, item)
+ #undef __dynamic_array
+ #define __dynamic_array(type, item)
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
int \
- ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
+ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
{ \
- struct args field; \
+ struct struct_name field; \
int ret; \
\
ret = trace_define_common_fields(event_call); \
return ret; \
}
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
- tpfmt)
+ #include "trace_entries.h"
+
++static int ftrace_raw_init_event(struct ftrace_event_call *call)
++{
++ INIT_LIST_HEAD(&call->fields);
++ return 0;
++}
+
+ #undef __field
+ #define __field(type, item)
+
+ #undef __field_desc
+ #define __field_desc(type, container, item)
+
+ #undef __array
+ #define __array(type, item, len)
+
+ #undef __array_desc
+ #define __array_desc(type, container, item, len)
+
+ #undef __dynamic_array
+ #define __dynamic_array(type, item)
+
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
-static int ftrace_raw_init_event_##call(void); \
+ \
+ struct ftrace_event_call __used \
+ __attribute__((__aligned__(4))) \
+ __attribute__((section("_ftrace_events"))) event_##call = { \
+ .name = #call, \
+ .id = type, \
+ .system = __stringify(TRACE_SYSTEM), \
- .raw_init = ftrace_raw_init_event_##call, \
++ .raw_init = ftrace_raw_init_event, \
+ .show_format = ftrace_format_##call, \
+ .define_fields = ftrace_define_fields_##call, \
+ }; \
-static int ftrace_raw_init_event_##call(void) \
-{ \
- INIT_LIST_HEAD(&event_##call.fields); \
- return 0; \
-} \
- #include "trace_event_types.h"
+ #include "trace_entries.h"
--- /dev/null
- #include <linux/perf_counter.h>
+/*
+ * kprobe based kernel tracer
+ *
+ * Created by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/kprobes.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/debugfs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/ptrace.h>
- perf_tpcounter_event(call->id, entry->ip, 1, entry, size);
++#include <linux/perf_event.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+#define MAX_TRACE_ARGS 128
+#define MAX_ARGSTR_LEN 63
+#define MAX_EVENT_NAME_LEN 64
+#define KPROBE_EVENT_SYSTEM "kprobes"
+
+/* currently, trace_kprobe only supports X86. */
+
+struct fetch_func {
+ unsigned long (*func)(struct pt_regs *, void *);
+ void *data;
+};
+
+static __kprobes unsigned long call_fetch(struct fetch_func *f,
+ struct pt_regs *regs)
+{
+ return f->func(regs, f->data);
+}
+
+/* fetch handlers */
+static __kprobes unsigned long fetch_register(struct pt_regs *regs,
+ void *offset)
+{
+ return regs_get_register(regs, (unsigned int)((unsigned long)offset));
+}
+
+static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
+ void *num)
+{
+ return regs_get_kernel_stack_nth(regs,
+ (unsigned int)((unsigned long)num));
+}
+
+static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
+{
+ unsigned long retval;
+
+ if (probe_kernel_address(addr, retval))
+ return 0;
+ return retval;
+}
+
+static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
+{
+ return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
+}
+
+static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
+ void *dummy)
+{
+ return regs_return_value(regs);
+}
+
+static __kprobes unsigned long fetch_ip(struct pt_regs *regs, void *dummy)
+{
+ return instruction_pointer(regs);
+}
+
+static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
+ void *dummy)
+{
+ return kernel_stack_pointer(regs);
+}
+
+/* Memory fetching by symbol */
+struct symbol_cache {
+ char *symbol;
+ long offset;
+ unsigned long addr;
+};
+
+static unsigned long update_symbol_cache(struct symbol_cache *sc)
+{
+ sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
+ if (sc->addr)
+ sc->addr += sc->offset;
+ return sc->addr;
+}
+
+static void free_symbol_cache(struct symbol_cache *sc)
+{
+ kfree(sc->symbol);
+ kfree(sc);
+}
+
+static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
+{
+ struct symbol_cache *sc;
+
+ if (!sym || strlen(sym) == 0)
+ return NULL;
+ sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
+ if (!sc)
+ return NULL;
+
+ sc->symbol = kstrdup(sym, GFP_KERNEL);
+ if (!sc->symbol) {
+ kfree(sc);
+ return NULL;
+ }
+ sc->offset = offset;
+
+ update_symbol_cache(sc);
+ return sc;
+}
+
+static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
+{
+ struct symbol_cache *sc = data;
+
+ if (sc->addr)
+ return fetch_memory(regs, (void *)sc->addr);
+ else
+ return 0;
+}
+
+/* Special indirect memory access interface */
+struct indirect_fetch_data {
+ struct fetch_func orig;
+ long offset;
+};
+
+static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
+{
+ struct indirect_fetch_data *ind = data;
+ unsigned long addr;
+
+ addr = call_fetch(&ind->orig, regs);
+ if (addr) {
+ addr += ind->offset;
+ return fetch_memory(regs, (void *)addr);
+ } else
+ return 0;
+}
+
+static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
+{
+ if (data->orig.func == fetch_indirect)
+ free_indirect_fetch_data(data->orig.data);
+ else if (data->orig.func == fetch_symbol)
+ free_symbol_cache(data->orig.data);
+ kfree(data);
+}
+
+/**
+ * Kprobe tracer core functions
+ */
+
+struct probe_arg {
+ struct fetch_func fetch;
+ const char *name;
+};
+
+/* Flags for trace_probe */
+#define TP_FLAG_TRACE 1
+#define TP_FLAG_PROFILE 2
+
+struct trace_probe {
+ struct list_head list;
+ struct kretprobe rp; /* Use rp.kp for kprobe use */
+ unsigned long nhit;
+ unsigned int flags; /* For TP_FLAG_* */
+ const char *symbol; /* symbol name */
+ struct ftrace_event_call call;
+ struct trace_event event;
+ unsigned int nr_args;
+ struct probe_arg args[];
+};
+
+#define SIZEOF_TRACE_PROBE(n) \
+ (offsetof(struct trace_probe, args) + \
+ (sizeof(struct probe_arg) * (n)))
+
+static __kprobes int probe_is_return(struct trace_probe *tp)
+{
+ return tp->rp.handler != NULL;
+}
+
+static __kprobes const char *probe_symbol(struct trace_probe *tp)
+{
+ return tp->symbol ? tp->symbol : "unknown";
+}
+
+static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
+{
+ int ret = -EINVAL;
+
+ if (ff->func == fetch_argument)
+ ret = snprintf(buf, n, "a%lu", (unsigned long)ff->data);
+ else if (ff->func == fetch_register) {
+ const char *name;
+ name = regs_query_register_name((unsigned int)((long)ff->data));
+ ret = snprintf(buf, n, "%%%s", name);
+ } else if (ff->func == fetch_stack)
+ ret = snprintf(buf, n, "s%lu", (unsigned long)ff->data);
+ else if (ff->func == fetch_memory)
+ ret = snprintf(buf, n, "@0x%p", ff->data);
+ else if (ff->func == fetch_symbol) {
+ struct symbol_cache *sc = ff->data;
+ ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
+ } else if (ff->func == fetch_retvalue)
+ ret = snprintf(buf, n, "rv");
+ else if (ff->func == fetch_ip)
+ ret = snprintf(buf, n, "ra");
+ else if (ff->func == fetch_stack_address)
+ ret = snprintf(buf, n, "sa");
+ else if (ff->func == fetch_indirect) {
+ struct indirect_fetch_data *id = ff->data;
+ size_t l = 0;
+ ret = snprintf(buf, n, "%+ld(", id->offset);
+ if (ret >= n)
+ goto end;
+ l += ret;
+ ret = probe_arg_string(buf + l, n - l, &id->orig);
+ if (ret < 0)
+ goto end;
+ l += ret;
+ ret = snprintf(buf + l, n - l, ")");
+ ret += l;
+ }
+end:
+ if (ret >= n)
+ return -ENOSPC;
+ return ret;
+}
+
+static int register_probe_event(struct trace_probe *tp);
+static void unregister_probe_event(struct trace_probe *tp);
+
+static DEFINE_MUTEX(probe_lock);
+static LIST_HEAD(probe_list);
+
+static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
+static int kretprobe_dispatcher(struct kretprobe_instance *ri,
+ struct pt_regs *regs);
+
+/*
+ * Allocate new trace_probe and initialize it (including kprobes).
+ */
+static struct trace_probe *alloc_trace_probe(const char *group,
+ const char *event,
+ void *addr,
+ const char *symbol,
+ unsigned long offs,
+ int nargs, int is_return)
+{
+ struct trace_probe *tp;
+
+ tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
+ if (!tp)
+ return ERR_PTR(-ENOMEM);
+
+ if (symbol) {
+ tp->symbol = kstrdup(symbol, GFP_KERNEL);
+ if (!tp->symbol)
+ goto error;
+ tp->rp.kp.symbol_name = tp->symbol;
+ tp->rp.kp.offset = offs;
+ } else
+ tp->rp.kp.addr = addr;
+
+ if (is_return)
+ tp->rp.handler = kretprobe_dispatcher;
+ else
+ tp->rp.kp.pre_handler = kprobe_dispatcher;
+
+ if (!event)
+ goto error;
+ tp->call.name = kstrdup(event, GFP_KERNEL);
+ if (!tp->call.name)
+ goto error;
+
+ if (!group)
+ goto error;
+ tp->call.system = kstrdup(group, GFP_KERNEL);
+ if (!tp->call.system)
+ goto error;
+
+ INIT_LIST_HEAD(&tp->list);
+ return tp;
+error:
+ kfree(tp->call.name);
+ kfree(tp->symbol);
+ kfree(tp);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void free_probe_arg(struct probe_arg *arg)
+{
+ if (arg->fetch.func == fetch_symbol)
+ free_symbol_cache(arg->fetch.data);
+ else if (arg->fetch.func == fetch_indirect)
+ free_indirect_fetch_data(arg->fetch.data);
+ kfree(arg->name);
+}
+
+static void free_trace_probe(struct trace_probe *tp)
+{
+ int i;
+
+ for (i = 0; i < tp->nr_args; i++)
+ free_probe_arg(&tp->args[i]);
+
+ kfree(tp->call.system);
+ kfree(tp->call.name);
+ kfree(tp->symbol);
+ kfree(tp);
+}
+
+static struct trace_probe *find_probe_event(const char *event)
+{
+ struct trace_probe *tp;
+
+ list_for_each_entry(tp, &probe_list, list)
+ if (!strcmp(tp->call.name, event))
+ return tp;
+ return NULL;
+}
+
+/* Unregister a trace_probe and probe_event: call with locking probe_lock */
+static void unregister_trace_probe(struct trace_probe *tp)
+{
+ if (probe_is_return(tp))
+ unregister_kretprobe(&tp->rp);
+ else
+ unregister_kprobe(&tp->rp.kp);
+ list_del(&tp->list);
+ unregister_probe_event(tp);
+}
+
+/* Register a trace_probe and probe_event */
+static int register_trace_probe(struct trace_probe *tp)
+{
+ struct trace_probe *old_tp;
+ int ret;
+
+ mutex_lock(&probe_lock);
+
+ /* register as an event */
+ old_tp = find_probe_event(tp->call.name);
+ if (old_tp) {
+ /* delete old event */
+ unregister_trace_probe(old_tp);
+ free_trace_probe(old_tp);
+ }
+ ret = register_probe_event(tp);
+ if (ret) {
+ pr_warning("Faild to register probe event(%d)\n", ret);
+ goto end;
+ }
+
+ tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
+ if (probe_is_return(tp))
+ ret = register_kretprobe(&tp->rp);
+ else
+ ret = register_kprobe(&tp->rp.kp);
+
+ if (ret) {
+ pr_warning("Could not insert probe(%d)\n", ret);
+ if (ret == -EILSEQ) {
+ pr_warning("Probing address(0x%p) is not an "
+ "instruction boundary.\n",
+ tp->rp.kp.addr);
+ ret = -EINVAL;
+ }
+ unregister_probe_event(tp);
+ } else
+ list_add_tail(&tp->list, &probe_list);
+end:
+ mutex_unlock(&probe_lock);
+ return ret;
+}
+
+/* Split symbol and offset. */
+static int split_symbol_offset(char *symbol, unsigned long *offset)
+{
+ char *tmp;
+ int ret;
+
+ if (!offset)
+ return -EINVAL;
+
+ tmp = strchr(symbol, '+');
+ if (tmp) {
+ /* skip sign because strict_strtol doesn't accept '+' */
+ ret = strict_strtoul(tmp + 1, 0, offset);
+ if (ret)
+ return ret;
+ *tmp = '\0';
+ } else
+ *offset = 0;
+ return 0;
+}
+
+#define PARAM_MAX_ARGS 16
+#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
+
+static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
+{
+ int ret = 0;
+ unsigned long param;
+ long offset;
+ char *tmp;
+
+ switch (arg[0]) {
+ case 'a': /* argument */
+ ret = strict_strtoul(arg + 1, 10, ¶m);
+ if (ret || param > PARAM_MAX_ARGS)
+ ret = -EINVAL;
+ else {
+ ff->func = fetch_argument;
+ ff->data = (void *)param;
+ }
+ break;
+ case 'r': /* retval or retaddr */
+ if (is_return && arg[1] == 'v') {
+ ff->func = fetch_retvalue;
+ ff->data = NULL;
+ } else if (is_return && arg[1] == 'a') {
+ ff->func = fetch_ip;
+ ff->data = NULL;
+ } else
+ ret = -EINVAL;
+ break;
+ case '%': /* named register */
+ ret = regs_query_register_offset(arg + 1);
+ if (ret >= 0) {
+ ff->func = fetch_register;
+ ff->data = (void *)(unsigned long)ret;
+ ret = 0;
+ }
+ break;
+ case 's': /* stack */
+ if (arg[1] == 'a') {
+ ff->func = fetch_stack_address;
+ ff->data = NULL;
+ } else {
+ ret = strict_strtoul(arg + 1, 10, ¶m);
+ if (ret || param > PARAM_MAX_STACK)
+ ret = -EINVAL;
+ else {
+ ff->func = fetch_stack;
+ ff->data = (void *)param;
+ }
+ }
+ break;
+ case '@': /* memory or symbol */
+ if (isdigit(arg[1])) {
+ ret = strict_strtoul(arg + 1, 0, ¶m);
+ if (ret)
+ break;
+ ff->func = fetch_memory;
+ ff->data = (void *)param;
+ } else {
+ ret = split_symbol_offset(arg + 1, &offset);
+ if (ret)
+ break;
+ ff->data = alloc_symbol_cache(arg + 1,
+ offset);
+ if (ff->data)
+ ff->func = fetch_symbol;
+ else
+ ret = -EINVAL;
+ }
+ break;
+ case '+': /* indirect memory */
+ case '-':
+ tmp = strchr(arg, '(');
+ if (!tmp) {
+ ret = -EINVAL;
+ break;
+ }
+ *tmp = '\0';
+ ret = strict_strtol(arg + 1, 0, &offset);
+ if (ret)
+ break;
+ if (arg[0] == '-')
+ offset = -offset;
+ arg = tmp + 1;
+ tmp = strrchr(arg, ')');
+ if (tmp) {
+ struct indirect_fetch_data *id;
+ *tmp = '\0';
+ id = kzalloc(sizeof(struct indirect_fetch_data),
+ GFP_KERNEL);
+ if (!id)
+ return -ENOMEM;
+ id->offset = offset;
+ ret = parse_probe_arg(arg, &id->orig, is_return);
+ if (ret)
+ kfree(id);
+ else {
+ ff->func = fetch_indirect;
+ ff->data = (void *)id;
+ }
+ } else
+ ret = -EINVAL;
+ break;
+ default:
+ /* TODO: support custom handler */
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
+static int create_trace_probe(int argc, char **argv)
+{
+ /*
+ * Argument syntax:
+ * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
+ * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
+ * Fetch args:
+ * aN : fetch Nth of function argument. (N:0-)
+ * rv : fetch return value
+ * ra : fetch return address
+ * sa : fetch stack address
+ * sN : fetch Nth of stack (N:0-)
+ * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
+ * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
+ * %REG : fetch register REG
+ * Indirect memory fetch:
+ * +|-offs(ARG) : fetch memory at ARG +|- offs address.
+ * Alias name of args:
+ * NAME=FETCHARG : set NAME as alias of FETCHARG.
+ */
+ struct trace_probe *tp;
+ int i, ret = 0;
+ int is_return = 0;
+ char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
+ unsigned long offset = 0;
+ void *addr = NULL;
+ char buf[MAX_EVENT_NAME_LEN];
+
+ if (argc < 2)
+ return -EINVAL;
+
+ if (argv[0][0] == 'p')
+ is_return = 0;
+ else if (argv[0][0] == 'r')
+ is_return = 1;
+ else
+ return -EINVAL;
+
+ if (argv[0][1] == ':') {
+ event = &argv[0][2];
+ if (strchr(event, '/')) {
+ group = event;
+ event = strchr(group, '/') + 1;
+ event[-1] = '\0';
+ if (strlen(group) == 0) {
+ pr_info("Group name is not specifiled\n");
+ return -EINVAL;
+ }
+ }
+ if (strlen(event) == 0) {
+ pr_info("Event name is not specifiled\n");
+ return -EINVAL;
+ }
+ }
+
+ if (isdigit(argv[1][0])) {
+ if (is_return)
+ return -EINVAL;
+ /* an address specified */
+ ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+ if (ret)
+ return ret;
+ } else {
+ /* a symbol specified */
+ symbol = argv[1];
+ /* TODO: support .init module functions */
+ ret = split_symbol_offset(symbol, &offset);
+ if (ret)
+ return ret;
+ if (offset && is_return)
+ return -EINVAL;
+ }
+ argc -= 2; argv += 2;
+
+ /* setup a probe */
+ if (!group)
+ group = KPROBE_EVENT_SYSTEM;
+ if (!event) {
+ /* Make a new event name */
+ if (symbol)
+ snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
+ is_return ? 'r' : 'p', symbol, offset);
+ else
+ snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
+ is_return ? 'r' : 'p', addr);
+ event = buf;
+ }
+ tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
+ is_return);
+ if (IS_ERR(tp))
+ return PTR_ERR(tp);
+
+ /* parse arguments */
+ ret = 0;
+ for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ /* Parse argument name */
+ arg = strchr(argv[i], '=');
+ if (arg)
+ *arg++ = '\0';
+ else
+ arg = argv[i];
+ tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+
+ /* Parse fetch argument */
+ if (strlen(arg) > MAX_ARGSTR_LEN) {
+ pr_info("Argument%d(%s) is too long.\n", i, arg);
+ ret = -ENOSPC;
+ goto error;
+ }
+ ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
+ if (ret)
+ goto error;
+ }
+ tp->nr_args = i;
+
+ ret = register_trace_probe(tp);
+ if (ret)
+ goto error;
+ return 0;
+
+error:
+ free_trace_probe(tp);
+ return ret;
+}
+
+static void cleanup_all_probes(void)
+{
+ struct trace_probe *tp;
+
+ mutex_lock(&probe_lock);
+ /* TODO: Use batch unregistration */
+ while (!list_empty(&probe_list)) {
+ tp = list_entry(probe_list.next, struct trace_probe, list);
+ unregister_trace_probe(tp);
+ free_trace_probe(tp);
+ }
+ mutex_unlock(&probe_lock);
+}
+
+
+/* Probes listing interfaces */
+static void *probes_seq_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&probe_lock);
+ return seq_list_start(&probe_list, *pos);
+}
+
+static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return seq_list_next(v, &probe_list, pos);
+}
+
+static void probes_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&probe_lock);
+}
+
+static int probes_seq_show(struct seq_file *m, void *v)
+{
+ struct trace_probe *tp = v;
+ int i, ret;
+ char buf[MAX_ARGSTR_LEN + 1];
+
+ seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
+ seq_printf(m, ":%s", tp->call.name);
+
+ if (tp->symbol)
+ seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
+ else
+ seq_printf(m, " 0x%p", tp->rp.kp.addr);
+
+ for (i = 0; i < tp->nr_args; i++) {
+ ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
+ if (ret < 0) {
+ pr_warning("Argument%d decoding error(%d).\n", i, ret);
+ return ret;
+ }
+ seq_printf(m, " %s=%s", tp->args[i].name, buf);
+ }
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static const struct seq_operations probes_seq_op = {
+ .start = probes_seq_start,
+ .next = probes_seq_next,
+ .stop = probes_seq_stop,
+ .show = probes_seq_show
+};
+
+static int probes_open(struct inode *inode, struct file *file)
+{
+ if ((file->f_mode & FMODE_WRITE) &&
+ (file->f_flags & O_TRUNC))
+ cleanup_all_probes();
+
+ return seq_open(file, &probes_seq_op);
+}
+
+static int command_trace_probe(const char *buf)
+{
+ char **argv;
+ int argc = 0, ret = 0;
+
+ argv = argv_split(GFP_KERNEL, buf, &argc);
+ if (!argv)
+ return -ENOMEM;
+
+ if (argc)
+ ret = create_trace_probe(argc, argv);
+
+ argv_free(argv);
+ return ret;
+}
+
+#define WRITE_BUFSIZE 128
+
+static ssize_t probes_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ char *kbuf, *tmp;
+ int ret;
+ size_t done;
+ size_t size;
+
+ kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ ret = done = 0;
+ while (done < count) {
+ size = count - done;
+ if (size >= WRITE_BUFSIZE)
+ size = WRITE_BUFSIZE - 1;
+ if (copy_from_user(kbuf, buffer + done, size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ kbuf[size] = '\0';
+ tmp = strchr(kbuf, '\n');
+ if (tmp) {
+ *tmp = '\0';
+ size = tmp - kbuf + 1;
+ } else if (done + size < count) {
+ pr_warning("Line length is too long: "
+ "Should be less than %d.", WRITE_BUFSIZE);
+ ret = -EINVAL;
+ goto out;
+ }
+ done += size;
+ /* Remove comments */
+ tmp = strchr(kbuf, '#');
+ if (tmp)
+ *tmp = '\0';
+
+ ret = command_trace_probe(kbuf);
+ if (ret)
+ goto out;
+ }
+ ret = done;
+out:
+ kfree(kbuf);
+ return ret;
+}
+
+static const struct file_operations kprobe_events_ops = {
+ .owner = THIS_MODULE,
+ .open = probes_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .write = probes_write,
+};
+
+/* Probes profiling interfaces */
+static int probes_profile_seq_show(struct seq_file *m, void *v)
+{
+ struct trace_probe *tp = v;
+
+ seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
+ tp->rp.kp.nmissed);
+
+ return 0;
+}
+
+static const struct seq_operations profile_seq_op = {
+ .start = probes_seq_start,
+ .next = probes_seq_next,
+ .stop = probes_seq_stop,
+ .show = probes_profile_seq_show
+};
+
+static int profile_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &profile_seq_op);
+}
+
+static const struct file_operations kprobe_profile_ops = {
+ .owner = THIS_MODULE,
+ .open = profile_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/* Kprobe handler */
+static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+ struct kprobe_trace_entry *entry;
+ struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
+ int size, i, pc;
+ unsigned long irq_flags;
+ struct ftrace_event_call *call = &tp->call;
+
+ tp->nhit++;
+
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
+ size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+
+ event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
+ irq_flags, pc);
+ if (!event)
+ return 0;
+
+ entry = ring_buffer_event_data(event);
+ entry->nargs = tp->nr_args;
+ entry->ip = (unsigned long)kp->addr;
+ for (i = 0; i < tp->nr_args; i++)
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
+
+ if (!filter_current_check_discard(buffer, call, entry, event))
+ trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+ return 0;
+}
+
+/* Kretprobe handler */
+static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+ struct kretprobe_trace_entry *entry;
+ struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
+ int size, i, pc;
+ unsigned long irq_flags;
+ struct ftrace_event_call *call = &tp->call;
+
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
+ size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+
+ event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
+ irq_flags, pc);
+ if (!event)
+ return 0;
+
+ entry = ring_buffer_event_data(event);
+ entry->nargs = tp->nr_args;
+ entry->func = (unsigned long)tp->rp.kp.addr;
+ entry->ret_ip = (unsigned long)ri->ret_addr;
+ for (i = 0; i < tp->nr_args; i++)
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
+
+ if (!filter_current_check_discard(buffer, call, entry, event))
+ trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+
+ return 0;
+}
+
+/* Event entry printers */
+enum print_line_t
+print_kprobe_event(struct trace_iterator *iter, int flags)
+{
+ struct kprobe_trace_entry *field;
+ struct trace_seq *s = &iter->seq;
+ struct trace_event *event;
+ struct trace_probe *tp;
+ int i;
+
+ field = (struct kprobe_trace_entry *)iter->ent;
+ event = ftrace_find_event(field->ent.type);
+ tp = container_of(event, struct trace_probe, event);
+
+ if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ goto partial;
+
+ if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+ goto partial;
+
+ if (!trace_seq_puts(s, ")"))
+ goto partial;
+
+ for (i = 0; i < field->nargs; i++)
+ if (!trace_seq_printf(s, " %s=%lx",
+ tp->args[i].name, field->args[i]))
+ goto partial;
+
+ if (!trace_seq_puts(s, "\n"))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+enum print_line_t
+print_kretprobe_event(struct trace_iterator *iter, int flags)
+{
+ struct kretprobe_trace_entry *field;
+ struct trace_seq *s = &iter->seq;
+ struct trace_event *event;
+ struct trace_probe *tp;
+ int i;
+
+ field = (struct kretprobe_trace_entry *)iter->ent;
+ event = ftrace_find_event(field->ent.type);
+ tp = container_of(event, struct trace_probe, event);
+
+ if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ goto partial;
+
+ if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
+ goto partial;
+
+ if (!trace_seq_puts(s, " <- "))
+ goto partial;
+
+ if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
+ goto partial;
+
+ if (!trace_seq_puts(s, ")"))
+ goto partial;
+
+ for (i = 0; i < field->nargs; i++)
+ if (!trace_seq_printf(s, " %s=%lx",
+ tp->args[i].name, field->args[i]))
+ goto partial;
+
+ if (!trace_seq_puts(s, "\n"))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int probe_event_enable(struct ftrace_event_call *call)
+{
+ struct trace_probe *tp = (struct trace_probe *)call->data;
+
+ tp->flags |= TP_FLAG_TRACE;
+ if (probe_is_return(tp))
+ return enable_kretprobe(&tp->rp);
+ else
+ return enable_kprobe(&tp->rp.kp);
+}
+
+static void probe_event_disable(struct ftrace_event_call *call)
+{
+ struct trace_probe *tp = (struct trace_probe *)call->data;
+
+ tp->flags &= ~TP_FLAG_TRACE;
+ if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
+ if (probe_is_return(tp))
+ disable_kretprobe(&tp->rp);
+ else
+ disable_kprobe(&tp->rp.kp);
+ }
+}
+
+static int probe_event_raw_init(struct ftrace_event_call *event_call)
+{
+ INIT_LIST_HEAD(&event_call->fields);
+
+ return 0;
+}
+
+#undef DEFINE_FIELD
+#define DEFINE_FIELD(type, item, name, is_signed) \
+ do { \
+ ret = trace_define_field(event_call, #type, name, \
+ offsetof(typeof(field), item), \
+ sizeof(field.item), is_signed, \
+ FILTER_OTHER); \
+ if (ret) \
+ return ret; \
+ } while (0)
+
+static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
+{
+ int ret, i;
+ struct kprobe_trace_entry field;
+ struct trace_probe *tp = (struct trace_probe *)event_call->data;
+
+ ret = trace_define_common_fields(event_call);
+ if (!ret)
+ return ret;
+
+ DEFINE_FIELD(unsigned long, ip, "ip", 0);
+ DEFINE_FIELD(int, nargs, "nargs", 1);
+ /* Set argument names as fields */
+ for (i = 0; i < tp->nr_args; i++)
+ DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
+ return 0;
+}
+
+static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
+{
+ int ret, i;
+ struct kretprobe_trace_entry field;
+ struct trace_probe *tp = (struct trace_probe *)event_call->data;
+
+ ret = trace_define_common_fields(event_call);
+ if (!ret)
+ return ret;
+
+ DEFINE_FIELD(unsigned long, func, "func", 0);
+ DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
+ DEFINE_FIELD(int, nargs, "nargs", 1);
+ /* Set argument names as fields */
+ for (i = 0; i < tp->nr_args; i++)
+ DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
+ return 0;
+}
+
+static int __probe_event_show_format(struct trace_seq *s,
+ struct trace_probe *tp, const char *fmt,
+ const char *arg)
+{
+ int i;
+
+ /* Show format */
+ if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
+ return 0;
+
+ for (i = 0; i < tp->nr_args; i++)
+ if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
+ return 0;
+
+ if (!trace_seq_printf(s, "\", %s", arg))
+ return 0;
+
+ for (i = 0; i < tp->nr_args; i++)
+ if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
+ return 0;
+
+ return trace_seq_puts(s, "\n");
+}
+
+#undef SHOW_FIELD
+#define SHOW_FIELD(type, item, name) \
+ do { \
+ ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
+ "offset:%u;\tsize:%u;\n", name, \
+ (unsigned int)offsetof(typeof(field), item),\
+ (unsigned int)sizeof(type)); \
+ if (!ret) \
+ return 0; \
+ } while (0)
+
+static int kprobe_event_show_format(struct ftrace_event_call *call,
+ struct trace_seq *s)
+{
+ struct kprobe_trace_entry field __attribute__((unused));
+ int ret, i;
+ struct trace_probe *tp = (struct trace_probe *)call->data;
+
+ SHOW_FIELD(unsigned long, ip, "ip");
+ SHOW_FIELD(int, nargs, "nargs");
+
+ /* Show fields */
+ for (i = 0; i < tp->nr_args; i++)
+ SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
+ trace_seq_puts(s, "\n");
+
+ return __probe_event_show_format(s, tp, "(%lx)", "REC->ip");
+}
+
+static int kretprobe_event_show_format(struct ftrace_event_call *call,
+ struct trace_seq *s)
+{
+ struct kretprobe_trace_entry field __attribute__((unused));
+ int ret, i;
+ struct trace_probe *tp = (struct trace_probe *)call->data;
+
+ SHOW_FIELD(unsigned long, func, "func");
+ SHOW_FIELD(unsigned long, ret_ip, "ret_ip");
+ SHOW_FIELD(int, nargs, "nargs");
+
+ /* Show fields */
+ for (i = 0; i < tp->nr_args; i++)
+ SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
+ trace_seq_puts(s, "\n");
+
+ return __probe_event_show_format(s, tp, "(%lx <- %lx)",
+ "REC->func, REC->ret_ip");
+}
+
+#ifdef CONFIG_EVENT_PROFILE
+
+/* Kprobe profile handler */
+static __kprobes int kprobe_profile_func(struct kprobe *kp,
+ struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+ struct ftrace_event_call *call = &tp->call;
+ struct kprobe_trace_entry *entry;
+ int size, __size, i, pc;
+ unsigned long irq_flags;
+
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
+ __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+ size = ALIGN(__size + sizeof(u32), sizeof(u64));
+ size -= sizeof(u32);
+
+ do {
+ char raw_data[size];
+ struct trace_entry *ent;
+ /*
+ * Zero dead bytes from alignment to avoid stack leak
+ * to userspace
+ */
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ entry = (struct kprobe_trace_entry *)raw_data;
+ ent = &entry->ent;
+
+ tracing_generic_entry_update(ent, irq_flags, pc);
+ ent->type = call->id;
+ entry->nargs = tp->nr_args;
+ entry->ip = (unsigned long)kp->addr;
+ for (i = 0; i < tp->nr_args; i++)
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
- perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size);
++ perf_tp_event(call->id, entry->ip, 1, entry, size);
+ } while (0);
+ return 0;
+}
+
+/* Kretprobe profile handler */
+static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+ struct ftrace_event_call *call = &tp->call;
+ struct kretprobe_trace_entry *entry;
+ int size, __size, i, pc;
+ unsigned long irq_flags;
+
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
+ __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+ size = ALIGN(__size + sizeof(u32), sizeof(u64));
+ size -= sizeof(u32);
+
+ do {
+ char raw_data[size];
+ struct trace_entry *ent;
+
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ entry = (struct kretprobe_trace_entry *)raw_data;
+ ent = &entry->ent;
+
+ tracing_generic_entry_update(ent, irq_flags, pc);
+ ent->type = call->id;
+ entry->nargs = tp->nr_args;
+ entry->func = (unsigned long)tp->rp.kp.addr;
+ entry->ret_ip = (unsigned long)ri->ret_addr;
+ for (i = 0; i < tp->nr_args; i++)
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
- if (atomic_inc_return(&call->profile_count))
- return 0;
-
++ perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
+ } while (0);
+ return 0;
+}
+
+static int probe_profile_enable(struct ftrace_event_call *call)
+{
+ struct trace_probe *tp = (struct trace_probe *)call->data;
+
- if (atomic_add_negative(-1, &call->profile_count))
- tp->flags &= ~TP_FLAG_PROFILE;
+ tp->flags |= TP_FLAG_PROFILE;
++
+ if (probe_is_return(tp))
+ return enable_kretprobe(&tp->rp);
+ else
+ return enable_kprobe(&tp->rp.kp);
+}
+
+static void probe_profile_disable(struct ftrace_event_call *call)
+{
+ struct trace_probe *tp = (struct trace_probe *)call->data;
+
- if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
++ tp->flags &= ~TP_FLAG_PROFILE;
+
++ if (!(tp->flags & TP_FLAG_TRACE)) {
+ if (probe_is_return(tp))
+ disable_kretprobe(&tp->rp);
+ else
+ disable_kprobe(&tp->rp.kp);
+ }
+}
+#endif /* CONFIG_EVENT_PROFILE */
+
+
+static __kprobes
+int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+
+ if (tp->flags & TP_FLAG_TRACE)
+ kprobe_trace_func(kp, regs);
+#ifdef CONFIG_EVENT_PROFILE
+ if (tp->flags & TP_FLAG_PROFILE)
+ kprobe_profile_func(kp, regs);
+#endif /* CONFIG_EVENT_PROFILE */
+ return 0; /* We don't tweek kernel, so just return 0 */
+}
+
+static __kprobes
+int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+
+ if (tp->flags & TP_FLAG_TRACE)
+ kretprobe_trace_func(ri, regs);
+#ifdef CONFIG_EVENT_PROFILE
+ if (tp->flags & TP_FLAG_PROFILE)
+ kretprobe_profile_func(ri, regs);
+#endif /* CONFIG_EVENT_PROFILE */
+ return 0; /* We don't tweek kernel, so just return 0 */
+}
+
+static int register_probe_event(struct trace_probe *tp)
+{
+ struct ftrace_event_call *call = &tp->call;
+ int ret;
+
+ /* Initialize ftrace_event_call */
+ if (probe_is_return(tp)) {
+ tp->event.trace = print_kretprobe_event;
+ call->raw_init = probe_event_raw_init;
+ call->show_format = kretprobe_event_show_format;
+ call->define_fields = kretprobe_event_define_fields;
+ } else {
+ tp->event.trace = print_kprobe_event;
+ call->raw_init = probe_event_raw_init;
+ call->show_format = kprobe_event_show_format;
+ call->define_fields = kprobe_event_define_fields;
+ }
+ call->event = &tp->event;
+ call->id = register_ftrace_event(&tp->event);
+ if (!call->id)
+ return -ENODEV;
+ call->enabled = 0;
+ call->regfunc = probe_event_enable;
+ call->unregfunc = probe_event_disable;
+
+#ifdef CONFIG_EVENT_PROFILE
+ atomic_set(&call->profile_count, -1);
+ call->profile_enable = probe_profile_enable;
+ call->profile_disable = probe_profile_disable;
+#endif
+ call->data = tp;
+ ret = trace_add_event_call(call);
+ if (ret) {
+ pr_info("Failed to register kprobe event: %s\n", call->name);
+ unregister_ftrace_event(&tp->event);
+ }
+ return ret;
+}
+
+static void unregister_probe_event(struct trace_probe *tp)
+{
+ /* tp->event is unregistered in trace_remove_event_call() */
+ trace_remove_event_call(&tp->call);
+}
+
+/* Make a debugfs interface for controling probe points */
+static __init int init_kprobe_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ if (!d_tracer)
+ return 0;
+
+ entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
+ NULL, &kprobe_events_ops);
+
+ /* Event list interface */
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'kprobe_events' entry\n");
+
+ /* Profile interface */
+ entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
+ NULL, &kprobe_profile_ops);
+
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'kprobe_profile' entry\n");
+ return 0;
+}
+fs_initcall(init_kprobe_trace);
+
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+
+static int kprobe_trace_selftest_target(int a1, int a2, int a3,
+ int a4, int a5, int a6)
+{
+ return a1 + a2 + a3 + a4 + a5 + a6;
+}
+
+static __init int kprobe_trace_self_tests_init(void)
+{
+ int ret;
+ int (*target)(int, int, int, int, int, int);
+
+ target = kprobe_trace_selftest_target;
+
+ pr_info("Testing kprobe tracing: ");
+
+ ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
+ "a1 a2 a3 a4 a5 a6");
+ if (WARN_ON_ONCE(ret))
+ pr_warning("error enabling function entry\n");
+
+ ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
+ "ra rv");
+ if (WARN_ON_ONCE(ret))
+ pr_warning("error enabling function return\n");
+
+ ret = target(1, 2, 3, 4, 5, 6);
+
+ cleanup_all_probes();
+
+ pr_cont("OK\n");
+ return 0;
+}
+
+late_initcall(kprobe_trace_self_tests_init);
+
+#endif
#include <trace/events/syscalls.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
- #include <linux/perf_counter.h>
+ #include <linux/perf_event.h>
#include <asm/syscall.h>
#include "trace_output.h"
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
-int reg_event_syscall_enter(void *ptr)
+int reg_event_syscall_enter(struct ftrace_event_call *call)
{
int ret = 0;
int num;
char *name;
- name = (char *)ptr;
+ name = (char *)call->data;
num = syscall_name_to_nr(name);
if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
return ret;
}
-void unreg_event_syscall_enter(void *ptr)
+void unreg_event_syscall_enter(struct ftrace_event_call *call)
{
int num;
char *name;
- name = (char *)ptr;
+ name = (char *)call->data;
num = syscall_name_to_nr(name);
if (num < 0 || num >= NR_syscalls)
return;
mutex_unlock(&syscall_trace_lock);
}
-int reg_event_syscall_exit(void *ptr)
+int reg_event_syscall_exit(struct ftrace_event_call *call)
{
int ret = 0;
int num;
char *name;
- name = (char *)ptr;
+ name = call->data;
num = syscall_name_to_nr(name);
if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
return ret;
}
-void unreg_event_syscall_exit(void *ptr)
+void unreg_event_syscall_exit(struct ftrace_event_call *call)
{
int num;
char *name;
- name = (char *)ptr;
+ name = call->data;
num = syscall_name_to_nr(name);
if (num < 0 || num >= NR_syscalls)
return;
static void prof_syscall_enter(struct pt_regs *regs, long id)
{
- struct syscall_trace_enter *rec;
struct syscall_metadata *sys_data;
+ struct syscall_trace_enter *rec;
+ unsigned long flags;
+ char *raw_data;
int syscall_nr;
int size;
+ int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- do {
- char raw_data[size];
+ if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+ "profile buffer not large enough"))
+ return;
+
+ /* Protect the per cpu buffer, begin the rcu read side */
+ local_irq_save(flags);
- /* zero the dead bytes from align to not leak stack to user */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ cpu = smp_processor_id();
+
+ if (in_nmi())
+ raw_data = rcu_dereference(trace_profile_buf_nmi);
+ else
+ raw_data = rcu_dereference(trace_profile_buf);
+
+ if (!raw_data)
+ goto end;
- rec = (struct syscall_trace_enter *) raw_data;
- tracing_generic_entry_update(&rec->ent, 0, 0);
- rec->ent.type = sys_data->enter_id;
- rec->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args,
- (unsigned long *)&rec->args);
- perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
- } while(0);
+ raw_data = per_cpu_ptr(raw_data, cpu);
+
+ /* zero the dead bytes from align to not leak stack to user */
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+ rec = (struct syscall_trace_enter *) raw_data;
+ tracing_generic_entry_update(&rec->ent, 0, 0);
+ rec->ent.type = sys_data->enter_id;
+ rec->nr = syscall_nr;
+ syscall_get_arguments(current, regs, 0, sys_data->nb_args,
+ (unsigned long *)&rec->args);
+ perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
+
+ end:
+ local_irq_restore(flags);
}
int reg_prof_syscall_enter(char *name)
static void prof_syscall_exit(struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
- struct syscall_trace_exit rec;
+ struct syscall_trace_exit *rec;
+ unsigned long flags;
int syscall_nr;
+ char *raw_data;
+ int size;
+ int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
if (!sys_data)
return;
- tracing_generic_entry_update(&rec.ent, 0, 0);
- rec.ent.type = sys_data->exit_id;
- rec.nr = syscall_nr;
- rec.ret = syscall_get_return_value(current, regs);
+ /* We can probably do that at build time */
+ size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
+ size -= sizeof(u32);
- perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+ /*
+ * Impossible, but be paranoid with the future
+ * How to put this check outside runtime?
+ */
+ if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+ "exit event has grown above profile buffer size"))
+ return;
+
+ /* Protect the per cpu buffer, begin the rcu read side */
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+
+ if (in_nmi())
+ raw_data = rcu_dereference(trace_profile_buf_nmi);
+ else
+ raw_data = rcu_dereference(trace_profile_buf);
+
+ if (!raw_data)
+ goto end;
+
+ raw_data = per_cpu_ptr(raw_data, cpu);
+
+ /* zero the dead bytes from align to not leak stack to user */
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+ rec = (struct syscall_trace_exit *)raw_data;
+
+ tracing_generic_entry_update(&rec->ent, 0, 0);
+ rec->ent.type = sys_data->exit_id;
+ rec->nr = syscall_nr;
+ rec->ret = syscall_get_return_value(current, regs);
+
+ perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
+
+ end:
+ local_irq_restore(flags);
}
int reg_prof_syscall_exit(char *name)