]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'oprofile/ring_buffer' into oprofile/oprofile-for-tip
authorRobert Richter <robert.richter@amd.com>
Thu, 8 Jan 2009 13:27:34 +0000 (14:27 +0100)
committerRobert Richter <robert.richter@amd.com>
Thu, 8 Jan 2009 13:27:34 +0000 (14:27 +0100)
13 files changed:
arch/Kconfig
arch/x86/oprofile/op_model_amd.c
drivers/oprofile/buffer_sync.c
drivers/oprofile/cpu_buffer.c
drivers/oprofile/cpu_buffer.h
drivers/oprofile/event_buffer.c
drivers/oprofile/oprof.c
drivers/oprofile/oprof.h
drivers/oprofile/oprofile_files.c
include/linux/oprofile.h
include/linux/ring_buffer.h
kernel/trace/ring_buffer.c
kernel/trace/trace.c

index 471e72dbaf8b123ce235b470303b56f2852209f0..2e13aa261929fd3a3689c41b0bfe6dcdf9805d5c 100644 (file)
@@ -6,6 +6,8 @@ config OPROFILE
        tristate "OProfile system profiling (EXPERIMENTAL)"
        depends on PROFILING
        depends on HAVE_OPROFILE
+       select TRACING
+       select RING_BUFFER
        help
          OProfile is a profiling system capable of profiling the
          whole system, include the kernel, kernel modules, libraries,
index 509513760a6e45c6ccade4b0054cf93f508add84..8fdf06e4edf93c4e93b37d71907bb274a0ebbfc3 100644 (file)
@@ -2,7 +2,7 @@
  * @file op_model_amd.c
  * athlon / K7 / K8 / Family 10h model-specific MSR operations
  *
- * @remark Copyright 2002-2008 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon
@@ -10,7 +10,7 @@
  * @author Graydon Hoare
  * @author Robert Richter <robert.richter@amd.com>
  * @author Barry Kasindorf
-*/
+ */
 
 #include <linux/oprofile.h>
 #include <linux/device.h>
@@ -60,56 +60,10 @@ static unsigned long reset_value[NUM_COUNTERS];
 #define IBS_OP_LOW_VALID_BIT           (1ULL<<18)      /* bit 18 */
 #define IBS_OP_LOW_ENABLE              (1ULL<<17)      /* bit 17 */
 
-/* Codes used in cpu_buffer.c */
-/* This produces duplicate code, need to be fixed */
-#define IBS_FETCH_BEGIN 3
-#define IBS_OP_BEGIN    4
-
-/* The function interface needs to be fixed, something like add
-   data. Should then be added to linux/oprofile.h. */
-extern void
-oprofile_add_ibs_sample(struct pt_regs *const regs,
-                       unsigned int *const ibs_sample, int ibs_code);
-
-struct ibs_fetch_sample {
-       /* MSRC001_1031 IBS Fetch Linear Address Register */
-       unsigned int ibs_fetch_lin_addr_low;
-       unsigned int ibs_fetch_lin_addr_high;
-       /* MSRC001_1030 IBS Fetch Control Register */
-       unsigned int ibs_fetch_ctl_low;
-       unsigned int ibs_fetch_ctl_high;
-       /* MSRC001_1032 IBS Fetch Physical Address Register */
-       unsigned int ibs_fetch_phys_addr_low;
-       unsigned int ibs_fetch_phys_addr_high;
-};
-
-struct ibs_op_sample {
-       /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */
-       unsigned int ibs_op_rip_low;
-       unsigned int ibs_op_rip_high;
-       /* MSRC001_1035 IBS Op Data Register */
-       unsigned int ibs_op_data1_low;
-       unsigned int ibs_op_data1_high;
-       /* MSRC001_1036 IBS Op Data 2 Register */
-       unsigned int ibs_op_data2_low;
-       unsigned int ibs_op_data2_high;
-       /* MSRC001_1037 IBS Op Data 3 Register */
-       unsigned int ibs_op_data3_low;
-       unsigned int ibs_op_data3_high;
-       /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
-       unsigned int ibs_dc_linear_low;
-       unsigned int ibs_dc_linear_high;
-       /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
-       unsigned int ibs_dc_phys_low;
-       unsigned int ibs_dc_phys_high;
-};
-
-/*
- * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+
-*/
-static void clear_ibs_nmi(void);
+#define IBS_FETCH_SIZE 6
+#define IBS_OP_SIZE    12
 
-static int ibs_allowed;        /* AMD Family10h and later */
+static int has_ibs;    /* AMD Family10h and later */
 
 struct op_ibs_config {
        unsigned long op_enabled;
@@ -200,31 +154,29 @@ static inline int
 op_amd_handle_ibs(struct pt_regs * const regs,
                  struct op_msrs const * const msrs)
 {
-       unsigned int low, high;
-       struct ibs_fetch_sample ibs_fetch;
-       struct ibs_op_sample ibs_op;
+       u32 low, high;
+       u64 msr;
+       struct op_entry entry;
 
-       if (!ibs_allowed)
+       if (!has_ibs)
                return 1;
 
        if (ibs_config.fetch_enabled) {
                rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
                if (high & IBS_FETCH_HIGH_VALID_BIT) {
-                       ibs_fetch.ibs_fetch_ctl_high = high;
-                       ibs_fetch.ibs_fetch_ctl_low = low;
-                       rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high);
-                       ibs_fetch.ibs_fetch_lin_addr_high = high;
-                       ibs_fetch.ibs_fetch_lin_addr_low = low;
-                       rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high);
-                       ibs_fetch.ibs_fetch_phys_addr_high = high;
-                       ibs_fetch.ibs_fetch_phys_addr_low = low;
-
-                       oprofile_add_ibs_sample(regs,
-                                               (unsigned int *)&ibs_fetch,
-                                               IBS_FETCH_BEGIN);
-
-                       /*reenable the IRQ */
-                       rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+                       rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
+                       oprofile_write_reserve(&entry, regs, msr,
+                                              IBS_FETCH_CODE, IBS_FETCH_SIZE);
+                       oprofile_add_data(&entry, (u32)msr);
+                       oprofile_add_data(&entry, (u32)(msr >> 32));
+                       oprofile_add_data(&entry, low);
+                       oprofile_add_data(&entry, high);
+                       rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
+                       oprofile_add_data(&entry, (u32)msr);
+                       oprofile_add_data(&entry, (u32)(msr >> 32));
+                       oprofile_write_commit(&entry);
+
+                       /* reenable the IRQ */
                        high &= ~IBS_FETCH_HIGH_VALID_BIT;
                        high |= IBS_FETCH_HIGH_ENABLE;
                        low &= IBS_FETCH_LOW_MAX_CNT_MASK;
@@ -235,30 +187,29 @@ op_amd_handle_ibs(struct pt_regs * const regs,
        if (ibs_config.op_enabled) {
                rdmsr(MSR_AMD64_IBSOPCTL, low, high);
                if (low & IBS_OP_LOW_VALID_BIT) {
-                       rdmsr(MSR_AMD64_IBSOPRIP, low, high);
-                       ibs_op.ibs_op_rip_low = low;
-                       ibs_op.ibs_op_rip_high = high;
-                       rdmsr(MSR_AMD64_IBSOPDATA, low, high);
-                       ibs_op.ibs_op_data1_low = low;
-                       ibs_op.ibs_op_data1_high = high;
-                       rdmsr(MSR_AMD64_IBSOPDATA2, low, high);
-                       ibs_op.ibs_op_data2_low = low;
-                       ibs_op.ibs_op_data2_high = high;
-                       rdmsr(MSR_AMD64_IBSOPDATA3, low, high);
-                       ibs_op.ibs_op_data3_low = low;
-                       ibs_op.ibs_op_data3_high = high;
-                       rdmsr(MSR_AMD64_IBSDCLINAD, low, high);
-                       ibs_op.ibs_dc_linear_low = low;
-                       ibs_op.ibs_dc_linear_high = high;
-                       rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high);
-                       ibs_op.ibs_dc_phys_low = low;
-                       ibs_op.ibs_dc_phys_high = high;
+                       rdmsrl(MSR_AMD64_IBSOPRIP, msr);
+                       oprofile_write_reserve(&entry, regs, msr,
+                                              IBS_OP_CODE, IBS_OP_SIZE);
+                       oprofile_add_data(&entry, (u32)msr);
+                       oprofile_add_data(&entry, (u32)(msr >> 32));
+                       rdmsrl(MSR_AMD64_IBSOPDATA, msr);
+                       oprofile_add_data(&entry, (u32)msr);
+                       oprofile_add_data(&entry, (u32)(msr >> 32));
+                       rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
+                       oprofile_add_data(&entry, (u32)msr);
+                       oprofile_add_data(&entry, (u32)(msr >> 32));
+                       rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
+                       oprofile_add_data(&entry, (u32)msr);
+                       oprofile_add_data(&entry, (u32)(msr >> 32));
+                       rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
+                       oprofile_add_data(&entry, (u32)msr);
+                       oprofile_add_data(&entry, (u32)(msr >> 32));
+                       rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
+                       oprofile_add_data(&entry, (u32)msr);
+                       oprofile_add_data(&entry, (u32)(msr >> 32));
+                       oprofile_write_commit(&entry);
 
                        /* reenable the IRQ */
-                       oprofile_add_ibs_sample(regs,
-                                               (unsigned int *)&ibs_op,
-                                               IBS_OP_BEGIN);
-                       rdmsr(MSR_AMD64_IBSOPCTL, low, high);
                        high = 0;
                        low &= ~IBS_OP_LOW_VALID_BIT;
                        low |= IBS_OP_LOW_ENABLE;
@@ -308,14 +259,14 @@ static void op_amd_start(struct op_msrs const * const msrs)
        }
 
 #ifdef CONFIG_OPROFILE_IBS
-       if (ibs_allowed && ibs_config.fetch_enabled) {
+       if (has_ibs && ibs_config.fetch_enabled) {
                low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
                high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
                        + IBS_FETCH_HIGH_ENABLE;
                wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
        }
 
-       if (ibs_allowed && ibs_config.op_enabled) {
+       if (has_ibs && ibs_config.op_enabled) {
                low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
                        + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
                        + IBS_OP_LOW_ENABLE;
@@ -331,8 +282,10 @@ static void op_amd_stop(struct op_msrs const * const msrs)
        unsigned int low, high;
        int i;
 
-       /* Subtle: stop on all counters to avoid race with
-        * setting our pm callback */
+       /*
+        * Subtle: stop on all counters to avoid race with setting our
+        * pm callback
+        */
        for (i = 0 ; i < NUM_COUNTERS ; ++i) {
                if (!reset_value[i])
                        continue;
@@ -342,14 +295,16 @@ static void op_amd_stop(struct op_msrs const * const msrs)
        }
 
 #ifdef CONFIG_OPROFILE_IBS
-       if (ibs_allowed && ibs_config.fetch_enabled) {
-               low = 0;                /* clear max count and enable */
+       if (has_ibs && ibs_config.fetch_enabled) {
+               /* clear max count and enable */
+               low = 0;
                high = 0;
                wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
        }
 
-       if (ibs_allowed && ibs_config.op_enabled) {
-               low = 0;                /* clear max count and enable */
+       if (has_ibs && ibs_config.op_enabled) {
+               /* clear max count and enable */
+               low = 0;
                high = 0;
                wrmsr(MSR_AMD64_IBSOPCTL, low, high);
        }
@@ -370,18 +325,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
        }
 }
 
-#ifndef CONFIG_OPROFILE_IBS
-
-/* no IBS support */
-
-static int op_amd_init(struct oprofile_operations *ops)
-{
-       return 0;
-}
-
-static void op_amd_exit(void) {}
-
-#else
+#ifdef CONFIG_OPROFILE_IBS
 
 static u8 ibs_eilvt_off;
 
@@ -395,7 +339,7 @@ static inline void apic_clear_ibs_nmi_per_cpu(void *arg)
        setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
 }
 
-static int pfm_amd64_setup_eilvt(void)
+static int init_ibs_nmi(void)
 {
 #define IBSCTL_LVTOFFSETVAL            (1 << 8)
 #define IBSCTL                         0x1cc
@@ -419,6 +363,7 @@ static int pfm_amd64_setup_eilvt(void)
                                       | IBSCTL_LVTOFFSETVAL);
                pci_read_config_dword(cpu_cfg, IBSCTL, &value);
                if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
+                       pci_dev_put(cpu_cfg);
                        printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
                                "IBSCTL = 0x%08x", value);
                        return 1;
@@ -443,33 +388,35 @@ static int pfm_amd64_setup_eilvt(void)
        return 0;
 }
 
-/*
- * initialize the APIC for the IBS interrupts
- * if available (AMD Family10h rev B0 and later)
- */
-static void setup_ibs(void)
+/* uninitialize the APIC for the IBS interrupts if needed */
+static void clear_ibs_nmi(void)
 {
-       ibs_allowed = boot_cpu_has(X86_FEATURE_IBS);
+       if (has_ibs)
+               on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
+}
+
+/* initialize the APIC for the IBS interrupts if available */
+static void ibs_init(void)
+{
+       has_ibs = boot_cpu_has(X86_FEATURE_IBS);
 
-       if (!ibs_allowed)
+       if (!has_ibs)
                return;
 
-       if (pfm_amd64_setup_eilvt()) {
-               ibs_allowed = 0;
+       if (init_ibs_nmi()) {
+               has_ibs = 0;
                return;
        }
 
        printk(KERN_INFO "oprofile: AMD IBS detected\n");
 }
 
-
-/*
- * unitialize the APIC for the IBS interrupts if needed on AMD Family10h
- * rev B0 and later */
-static void clear_ibs_nmi(void)
+static void ibs_exit(void)
 {
-       if (ibs_allowed)
-               on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
+       if (!has_ibs)
+               return;
+
+       clear_ibs_nmi();
 }
 
 static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
@@ -486,7 +433,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
        if (ret)
                return ret;
 
-       if (!ibs_allowed)
+       if (!has_ibs)
                return ret;
 
        /* model specific files */
@@ -519,7 +466,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
 
 static int op_amd_init(struct oprofile_operations *ops)
 {
-       setup_ibs();
+       ibs_init();
        create_arch_files = ops->create_files;
        ops->create_files = setup_ibs_files;
        return 0;
@@ -527,10 +474,21 @@ static int op_amd_init(struct oprofile_operations *ops)
 
 static void op_amd_exit(void)
 {
-       clear_ibs_nmi();
+       ibs_exit();
 }
 
-#endif
+#else
+
+/* no IBS support */
+
+static int op_amd_init(struct oprofile_operations *ops)
+{
+       return 0;
+}
+
+static void op_amd_exit(void) {}
+
+#endif /* CONFIG_OPROFILE_IBS */
 
 struct op_x86_model_spec const op_amd_spec = {
        .init                   = op_amd_init,
index b55cd23ffdefc81dfb80409a98c5697c21822b50..ac014cb27915006b07426f509191bedbda6469c8 100644 (file)
@@ -1,11 +1,12 @@
 /**
  * @file buffer_sync.c
  *
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
  * @author Barry Kasindorf
+ * @author Robert Richter <robert.richter@amd.com>
  *
  * This is the core of the buffer management. Each
  * CPU buffer is processed and entered into the
@@ -268,18 +269,6 @@ lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset)
        return cookie;
 }
 
-static void increment_tail(struct oprofile_cpu_buffer *b)
-{
-       unsigned long new_tail = b->tail_pos + 1;
-
-       rmb();  /* be sure fifo pointers are synchromized */
-
-       if (new_tail < b->buffer_size)
-               b->tail_pos = new_tail;
-       else
-               b->tail_pos = 0;
-}
-
 static unsigned long last_cookie = INVALID_COOKIE;
 
 static void add_cpu_switch(int i)
@@ -327,84 +316,73 @@ static void add_trace_begin(void)
        add_event_entry(TRACE_BEGIN_CODE);
 }
 
-#ifdef CONFIG_OPROFILE_IBS
-
-#define IBS_FETCH_CODE_SIZE    2
-#define IBS_OP_CODE_SIZE       5
-#define IBS_EIP(offset)                                \
-       (((struct op_sample *)&cpu_buf->buffer[(offset)])->eip)
-#define IBS_EVENT(offset)                              \
-       (((struct op_sample *)&cpu_buf->buffer[(offset)])->event)
-
-/*
- * Add IBS fetch and op entries to event buffer
- */
-static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
-                         struct mm_struct *mm)
+static void add_data(struct op_entry *entry, struct mm_struct *mm)
 {
-       unsigned long rip;
-       int i, count;
-       unsigned long ibs_cookie = 0;
+       unsigned long code, pc, val;
+       unsigned long cookie;
        off_t offset;
 
-       increment_tail(cpu_buf);        /* move to RIP entry */
-
-       rip = IBS_EIP(cpu_buf->tail_pos);
-
-#ifdef __LP64__
-       rip += IBS_EVENT(cpu_buf->tail_pos) << 32;
-#endif
+       if (!op_cpu_buffer_get_data(entry, &code))
+               return;
+       if (!op_cpu_buffer_get_data(entry, &pc))
+               return;
+       if (!op_cpu_buffer_get_size(entry))
+               return;
 
        if (mm) {
-               ibs_cookie = lookup_dcookie(mm, rip, &offset);
+               cookie = lookup_dcookie(mm, pc, &offset);
 
-               if (ibs_cookie == NO_COOKIE)
-                       offset = rip;
-               if (ibs_cookie == INVALID_COOKIE) {
+               if (cookie == NO_COOKIE)
+                       offset = pc;
+               if (cookie == INVALID_COOKIE) {
                        atomic_inc(&oprofile_stats.sample_lost_no_mapping);
-                       offset = rip;
+                       offset = pc;
                }
-               if (ibs_cookie != last_cookie) {
-                       add_cookie_switch(ibs_cookie);
-                       last_cookie = ibs_cookie;
+               if (cookie != last_cookie) {
+                       add_cookie_switch(cookie);
+                       last_cookie = cookie;
                }
        } else
-               offset = rip;
+               offset = pc;
 
        add_event_entry(ESCAPE_CODE);
        add_event_entry(code);
        add_event_entry(offset);        /* Offset from Dcookie */
 
-       /* we send the Dcookie offset, but send the raw Linear Add also*/
-       add_event_entry(IBS_EIP(cpu_buf->tail_pos));
-       add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
-
-       if (code == IBS_FETCH_CODE)
-               count = IBS_FETCH_CODE_SIZE;    /*IBS FETCH is 2 int64s*/
-       else
-               count = IBS_OP_CODE_SIZE;       /*IBS OP is 5 int64s*/
-
-       for (i = 0; i < count; i++) {
-               increment_tail(cpu_buf);
-               add_event_entry(IBS_EIP(cpu_buf->tail_pos));
-               add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
-       }
+       while (op_cpu_buffer_get_data(entry, &val))
+               add_event_entry(val);
 }
 
-#endif
-
-static void add_sample_entry(unsigned long offset, unsigned long event)
+static inline void add_sample_entry(unsigned long offset, unsigned long event)
 {
        add_event_entry(offset);
        add_event_entry(event);
 }
 
 
-static int add_us_sample(struct mm_struct *mm, struct op_sample *s)
+/*
+ * Add a sample to the global event buffer. If possible the
+ * sample is converted into a persistent dentry/offset pair
+ * for later lookup from userspace. Return 0 on failure.
+ */
+static int
+add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
 {
        unsigned long cookie;
        off_t offset;
 
+       if (in_kernel) {
+               add_sample_entry(s->eip, s->event);
+               return 1;
+       }
+
+       /* add userspace sample */
+
+       if (!mm) {
+               atomic_inc(&oprofile_stats.sample_lost_no_mm);
+               return 0;
+       }
+
        cookie = lookup_dcookie(mm, s->eip, &offset);
 
        if (cookie == INVALID_COOKIE) {
@@ -423,25 +401,6 @@ static int add_us_sample(struct mm_struct *mm, struct op_sample *s)
 }
 
 
-/* Add a sample to the global event buffer. If possible the
- * sample is converted into a persistent dentry/offset pair
- * for later lookup from userspace.
- */
-static int
-add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
-{
-       if (in_kernel) {
-               add_sample_entry(s->eip, s->event);
-               return 1;
-       } else if (mm) {
-               return add_us_sample(mm, s);
-       } else {
-               atomic_inc(&oprofile_stats.sample_lost_no_mm);
-       }
-       return 0;
-}
-
-
 static void release_mm(struct mm_struct *mm)
 {
        if (!mm)
@@ -466,33 +425,6 @@ static inline int is_code(unsigned long val)
 }
 
 
-/* "acquire" as many cpu buffer slots as we can */
-static unsigned long get_slots(struct oprofile_cpu_buffer *b)
-{
-       unsigned long head = b->head_pos;
-       unsigned long tail = b->tail_pos;
-
-       /*
-        * Subtle. This resets the persistent last_task
-        * and in_kernel values used for switching notes.
-        * BUT, there is a small window between reading
-        * head_pos, and this call, that means samples
-        * can appear at the new head position, but not
-        * be prefixed with the notes for switching
-        * kernel mode or a task switch. This small hole
-        * can lead to mis-attribution or samples where
-        * we don't know if it's in the kernel or not,
-        * at the start of an event buffer.
-        */
-       cpu_buffer_reset(b);
-
-       if (head >= tail)
-               return head - tail;
-
-       return head + (b->buffer_size - tail);
-}
-
-
 /* Move tasks along towards death. Any tasks on dead_tasks
  * will definitely have no remaining references in any
  * CPU buffers at this point, because we use two lists,
@@ -559,71 +491,72 @@ typedef enum {
  */
 void sync_buffer(int cpu)
 {
-       struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
        struct mm_struct *mm = NULL;
+       struct mm_struct *oldmm;
+       unsigned long val;
        struct task_struct *new;
        unsigned long cookie = 0;
        int in_kernel = 1;
        sync_buffer_state state = sb_buffer_start;
-#ifndef CONFIG_OPROFILE_IBS
        unsigned int i;
        unsigned long available;
-#endif
+       unsigned long flags;
+       struct op_entry entry;
+       struct op_sample *sample;
 
        mutex_lock(&buffer_mutex);
 
        add_cpu_switch(cpu);
 
-       /* Remember, only we can modify tail_pos */
-
-#ifndef CONFIG_OPROFILE_IBS
-       available = get_slots(cpu_buf);
+       op_cpu_buffer_reset(cpu);
+       available = op_cpu_buffer_entries(cpu);
 
        for (i = 0; i < available; ++i) {
-#else
-       while (get_slots(cpu_buf)) {
-#endif
-               struct op_sample *s = &cpu_buf->buffer[cpu_buf->tail_pos];
+               sample = op_cpu_buffer_read_entry(&entry, cpu);
+               if (!sample)
+                       break;
 
-               if (is_code(s->eip)) {
-                       if (s->event <= CPU_IS_KERNEL) {
+               if (is_code(sample->eip)) {
+                       flags = sample->event;
+                       if (flags & TRACE_BEGIN) {
+                               state = sb_bt_start;
+                               add_trace_begin();
+                       }
+                       if (flags & KERNEL_CTX_SWITCH) {
                                /* kernel/userspace switch */
-                               in_kernel = s->event;
+                               in_kernel = flags & IS_KERNEL;
                                if (state == sb_buffer_start)
                                        state = sb_sample_start;
-                               add_kernel_ctx_switch(s->event);
-                       } else if (s->event == CPU_TRACE_BEGIN) {
-                               state = sb_bt_start;
-                               add_trace_begin();
-#ifdef CONFIG_OPROFILE_IBS
-                       } else if (s->event == IBS_FETCH_BEGIN) {
-                               state = sb_bt_start;
-                               add_ibs_begin(cpu_buf, IBS_FETCH_CODE, mm);
-                       } else if (s->event == IBS_OP_BEGIN) {
-                               state = sb_bt_start;
-                               add_ibs_begin(cpu_buf, IBS_OP_CODE, mm);
-#endif
-                       } else {
-                               struct mm_struct *oldmm = mm;
-
+                               add_kernel_ctx_switch(flags & IS_KERNEL);
+                       }
+                       if (flags & USER_CTX_SWITCH
+                           && op_cpu_buffer_get_data(&entry, &val)) {
                                /* userspace context switch */
-                               new = (struct task_struct *)s->event;
-
+                               new = (struct task_struct *)val;
+                               oldmm = mm;
                                release_mm(oldmm);
                                mm = take_tasks_mm(new);
                                if (mm != oldmm)
                                        cookie = get_exec_dcookie(mm);
                                add_user_ctx_switch(new, cookie);
                        }
-               } else if (state >= sb_bt_start &&
-                          !add_sample(mm, s, in_kernel)) {
-                       if (state == sb_bt_start) {
-                               state = sb_bt_ignore;
-                               atomic_inc(&oprofile_stats.bt_lost_no_mapping);
-                       }
+                       if (op_cpu_buffer_get_size(&entry))
+                               add_data(&entry, mm);
+                       continue;
                }
 
-               increment_tail(cpu_buf);
+               if (state < sb_bt_start)
+                       /* ignore sample */
+                       continue;
+
+               if (add_sample(mm, sample, in_kernel))
+                       continue;
+
+               /* ignore backtraces if failed to add a sample */
+               if (state == sb_bt_start) {
+                       state = sb_bt_ignore;
+                       atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+               }
        }
        release_mm(mm);
 
index 01d38e78cde18bd341fca22940249e4a80068217..2e03b6d796d39aac794e6c8c52d5de979f82a7c7 100644 (file)
@@ -1,11 +1,12 @@
 /**
  * @file cpu_buffer.c
  *
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
  * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Robert Richter <robert.richter@amd.com>
  *
  * Each CPU has a local buffer that stores PC value/event
  * pairs. We also log context switches when we notice them.
 #include "buffer_sync.h"
 #include "oprof.h"
 
+#define OP_BUFFER_FLAGS        0
+
+/*
+ * Read and write access is using spin locking. Thus, writing to the
+ * buffer by NMI handler (x86) could occur also during critical
+ * sections when reading the buffer. To avoid this, there are 2
+ * buffers for independent read and write access. Read access is in
+ * process context only, write access only in the NMI handler. If the
+ * read buffer runs empty, both buffers are swapped atomically. There
+ * is potentially a small window during swapping where the buffers are
+ * disabled and samples could be lost.
+ *
+ * Using 2 buffers is a little bit overhead, but the solution is clear
+ * and does not require changes in the ring buffer implementation. It
+ * can be changed to a single buffer solution when the ring buffer
+ * access is implemented as non-locking atomic code.
+ */
+static struct ring_buffer *op_ring_buffer_read;
+static struct ring_buffer *op_ring_buffer_write;
 DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
 
 static void wq_sync_buffer(struct work_struct *work);
@@ -35,19 +55,9 @@ static void wq_sync_buffer(struct work_struct *work);
 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
 static int work_enabled;
 
-void free_cpu_buffers(void)
-{
-       int i;
-
-       for_each_possible_cpu(i) {
-               vfree(per_cpu(cpu_buffer, i).buffer);
-               per_cpu(cpu_buffer, i).buffer = NULL;
-       }
-}
-
 unsigned long oprofile_get_cpu_buffer_size(void)
 {
-       return fs_cpu_buffer_size;
+       return oprofile_cpu_buffer_size;
 }
 
 void oprofile_cpu_buffer_inc_smpl_lost(void)
@@ -58,26 +68,36 @@ void oprofile_cpu_buffer_inc_smpl_lost(void)
        cpu_buf->sample_lost_overflow++;
 }
 
+void free_cpu_buffers(void)
+{
+       if (op_ring_buffer_read)
+               ring_buffer_free(op_ring_buffer_read);
+       op_ring_buffer_read = NULL;
+       if (op_ring_buffer_write)
+               ring_buffer_free(op_ring_buffer_write);
+       op_ring_buffer_write = NULL;
+}
+
 int alloc_cpu_buffers(void)
 {
        int i;
 
-       unsigned long buffer_size = fs_cpu_buffer_size;
+       unsigned long buffer_size = oprofile_cpu_buffer_size;
+
+       op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
+       if (!op_ring_buffer_read)
+               goto fail;
+       op_ring_buffer_write = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
+       if (!op_ring_buffer_write)
+               goto fail;
 
        for_each_possible_cpu(i) {
                struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
 
-               b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
-                       cpu_to_node(i));
-               if (!b->buffer)
-                       goto fail;
-
                b->last_task = NULL;
                b->last_is_kernel = -1;
                b->tracing = 0;
                b->buffer_size = buffer_size;
-               b->tail_pos = 0;
-               b->head_pos = 0;
                b->sample_received = 0;
                b->sample_lost_overflow = 0;
                b->backtrace_aborted = 0;
@@ -124,73 +144,156 @@ void end_cpu_work(void)
        flush_scheduled_work();
 }
 
-/* Resets the cpu buffer to a sane state. */
-void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf)
+/*
+ * This function prepares the cpu buffer to write a sample.
+ *
+ * Struct op_entry is used during operations on the ring buffer while
+ * struct op_sample contains the data that is stored in the ring
+ * buffer. Struct entry can be uninitialized. The function reserves a
+ * data array that is specified by size. Use
+ * op_cpu_buffer_write_commit() after preparing the sample. In case of
+ * errors a null pointer is returned, otherwise the pointer to the
+ * sample.
+ *
+ */
+struct op_sample
+*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
 {
-       /* reset these to invalid values; the next sample
-        * collected will populate the buffer with proper
-        * values to initialize the buffer
-        */
-       cpu_buf->last_is_kernel = -1;
-       cpu_buf->last_task = NULL;
+       entry->event = ring_buffer_lock_reserve
+               (op_ring_buffer_write, sizeof(struct op_sample) +
+                size * sizeof(entry->sample->data[0]), &entry->irq_flags);
+       if (entry->event)
+               entry->sample = ring_buffer_event_data(entry->event);
+       else
+               entry->sample = NULL;
+
+       if (!entry->sample)
+               return NULL;
+
+       entry->size = size;
+       entry->data = entry->sample->data;
+
+       return entry->sample;
 }
 
-/* compute number of available slots in cpu_buffer queue */
-static unsigned long nr_available_slots(struct oprofile_cpu_buffer const *b)
+int op_cpu_buffer_write_commit(struct op_entry *entry)
 {
-       unsigned long head = b->head_pos;
-       unsigned long tail = b->tail_pos;
+       return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event,
+                                        entry->irq_flags);
+}
 
-       if (tail > head)
-               return (tail - head) - 1;
+struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
+{
+       struct ring_buffer_event *e;
+       e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
+       if (e)
+               goto event;
+       if (ring_buffer_swap_cpu(op_ring_buffer_read,
+                                op_ring_buffer_write,
+                                cpu))
+               return NULL;
+       e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
+       if (e)
+               goto event;
+       return NULL;
+
+event:
+       entry->event = e;
+       entry->sample = ring_buffer_event_data(e);
+       entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
+               / sizeof(entry->sample->data[0]);
+       entry->data = entry->sample->data;
+       return entry->sample;
+}
 
-       return tail + (b->buffer_size - head) - 1;
+unsigned long op_cpu_buffer_entries(int cpu)
+{
+       return ring_buffer_entries_cpu(op_ring_buffer_read, cpu)
+               + ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
 }
 
-static void increment_head(struct oprofile_cpu_buffer *b)
+static int
+op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
+           int is_kernel, struct task_struct *task)
 {
-       unsigned long new_head = b->head_pos + 1;
+       struct op_entry entry;
+       struct op_sample *sample;
+       unsigned long flags;
+       int size;
+
+       flags = 0;
 
-       /* Ensure anything written to the slot before we
-        * increment is visible */
-       wmb();
+       if (backtrace)
+               flags |= TRACE_BEGIN;
+
+       /* notice a switch from user->kernel or vice versa */
+       is_kernel = !!is_kernel;
+       if (cpu_buf->last_is_kernel != is_kernel) {
+               cpu_buf->last_is_kernel = is_kernel;
+               flags |= KERNEL_CTX_SWITCH;
+               if (is_kernel)
+                       flags |= IS_KERNEL;
+       }
+
+       /* notice a task switch */
+       if (cpu_buf->last_task != task) {
+               cpu_buf->last_task = task;
+               flags |= USER_CTX_SWITCH;
+       }
+
+       if (!flags)
+               /* nothing to do */
+               return 0;
 
-       if (new_head < b->buffer_size)
-               b->head_pos = new_head;
+       if (flags & USER_CTX_SWITCH)
+               size = 1;
        else
-               b->head_pos = 0;
-}
+               size = 0;
 
-static inline void
-add_sample(struct oprofile_cpu_buffer *cpu_buf,
-          unsigned long pc, unsigned long event)
-{
-       struct op_sample *entry = &cpu_buf->buffer[cpu_buf->head_pos];
-       entry->eip = pc;
-       entry->event = event;
-       increment_head(cpu_buf);
+       sample = op_cpu_buffer_write_reserve(&entry, size);
+       if (!sample)
+               return -ENOMEM;
+
+       sample->eip = ESCAPE_CODE;
+       sample->event = flags;
+
+       if (size)
+               op_cpu_buffer_add_data(&entry, (unsigned long)task);
+
+       op_cpu_buffer_write_commit(&entry);
+
+       return 0;
 }
 
-static inline void
-add_code(struct oprofile_cpu_buffer *buffer, unsigned long value)
+static inline int
+op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
+             unsigned long pc, unsigned long event)
 {
-       add_sample(buffer, ESCAPE_CODE, value);
+       struct op_entry entry;
+       struct op_sample *sample;
+
+       sample = op_cpu_buffer_write_reserve(&entry, 0);
+       if (!sample)
+               return -ENOMEM;
+
+       sample->eip = pc;
+       sample->event = event;
+
+       return op_cpu_buffer_write_commit(&entry);
 }
 
-/* This must be safe from any context. It's safe writing here
- * because of the head/tail separation of the writer and reader
- * of the CPU buffer.
+/*
+ * This must be safe from any context.
  *
  * is_kernel is needed because on some architectures you cannot
  * tell if you are in kernel or user space simply by looking at
  * pc. We tag this in the buffer by generating kernel enter/exit
  * events whenever is_kernel changes
  */
-static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
-                     int is_kernel, unsigned long event)
+static int
+log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
+          unsigned long backtrace, int is_kernel, unsigned long event)
 {
-       struct task_struct *task;
-
        cpu_buf->sample_received++;
 
        if (pc == ESCAPE_CODE) {
@@ -198,131 +301,115 @@ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
                return 0;
        }
 
-       if (nr_available_slots(cpu_buf) < 3) {
-               cpu_buf->sample_lost_overflow++;
-               return 0;
-       }
-
-       is_kernel = !!is_kernel;
+       if (op_add_code(cpu_buf, backtrace, is_kernel, current))
+               goto fail;
 
-       task = current;
+       if (op_add_sample(cpu_buf, pc, event))
+               goto fail;
 
-       /* notice a switch from user->kernel or vice versa */
-       if (cpu_buf->last_is_kernel != is_kernel) {
-               cpu_buf->last_is_kernel = is_kernel;
-               add_code(cpu_buf, is_kernel);
-       }
-
-       /* notice a task switch */
-       if (cpu_buf->last_task != task) {
-               cpu_buf->last_task = task;
-               add_code(cpu_buf, (unsigned long)task);
-       }
-
-       add_sample(cpu_buf, pc, event);
        return 1;
+
+fail:
+       cpu_buf->sample_lost_overflow++;
+       return 0;
 }
 
-static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
+static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
 {
-       if (nr_available_slots(cpu_buf) < 4) {
-               cpu_buf->sample_lost_overflow++;
-               return 0;
-       }
-
-       add_code(cpu_buf, CPU_TRACE_BEGIN);
        cpu_buf->tracing = 1;
-       return 1;
 }
 
-static void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
+static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
 {
        cpu_buf->tracing = 0;
 }
 
-void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
-                               unsigned long event, int is_kernel)
+static inline void
+__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
+                         unsigned long event, int is_kernel)
 {
        struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+       unsigned long backtrace = oprofile_backtrace_depth;
 
-       if (!backtrace_depth) {
-               log_sample(cpu_buf, pc, is_kernel, event);
+       /*
+        * if log_sample() fail we can't backtrace since we lost the
+        * source of this event
+        */
+       if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event))
+               /* failed */
                return;
-       }
 
-       if (!oprofile_begin_trace(cpu_buf))
+       if (!backtrace)
                return;
 
-       /* if log_sample() fail we can't backtrace since we lost the source
-        * of this event */
-       if (log_sample(cpu_buf, pc, is_kernel, event))
-               oprofile_ops.backtrace(regs, backtrace_depth);
+       oprofile_begin_trace(cpu_buf);
+       oprofile_ops.backtrace(regs, backtrace);
        oprofile_end_trace(cpu_buf);
 }
 
+void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
+                            unsigned long event, int is_kernel)
+{
+       __oprofile_add_ext_sample(pc, regs, event, is_kernel);
+}
+
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
 {
        int is_kernel = !user_mode(regs);
        unsigned long pc = profile_pc(regs);
 
-       oprofile_add_ext_sample(pc, regs, event, is_kernel);
+       __oprofile_add_ext_sample(pc, regs, event, is_kernel);
 }
 
-#ifdef CONFIG_OPROFILE_IBS
-
-#define MAX_IBS_SAMPLE_SIZE 14
-
-void oprofile_add_ibs_sample(struct pt_regs *const regs,
-                            unsigned int *const ibs_sample, int ibs_code)
+/*
+ * Add samples with data to the ring buffer.
+ *
+ * Use oprofile_add_data(&entry, val) to add data and
+ * oprofile_write_commit(&entry) to commit the sample.
+ */
+void
+oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs,
+                      unsigned long pc, int code, int size)
 {
+       struct op_sample *sample;
        int is_kernel = !user_mode(regs);
        struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
-       struct task_struct *task;
 
        cpu_buf->sample_received++;
 
-       if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) {
-               /* we can't backtrace since we lost the source of this event */
-               cpu_buf->sample_lost_overflow++;
-               return;
-       }
+       /* no backtraces for samples with data */
+       if (op_add_code(cpu_buf, 0, is_kernel, current))
+               goto fail;
 
-       /* notice a switch from user->kernel or vice versa */
-       if (cpu_buf->last_is_kernel != is_kernel) {
-               cpu_buf->last_is_kernel = is_kernel;
-               add_code(cpu_buf, is_kernel);
-       }
+       sample = op_cpu_buffer_write_reserve(entry, size + 2);
+       if (!sample)
+               goto fail;
+       sample->eip = ESCAPE_CODE;
+       sample->event = 0;              /* no flags */
 
-       /* notice a task switch */
-       if (!is_kernel) {
-               task = current;
-               if (cpu_buf->last_task != task) {
-                       cpu_buf->last_task = task;
-                       add_code(cpu_buf, (unsigned long)task);
-               }
-       }
+       op_cpu_buffer_add_data(entry, code);
+       op_cpu_buffer_add_data(entry, pc);
 
-       add_code(cpu_buf, ibs_code);
-       add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]);
-       add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]);
-       add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]);
+       return;
 
-       if (ibs_code == IBS_OP_BEGIN) {
-               add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]);
-               add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]);
-               add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]);
-       }
+fail:
+       cpu_buf->sample_lost_overflow++;
+}
 
-       if (backtrace_depth)
-               oprofile_ops.backtrace(regs, backtrace_depth);
+int oprofile_add_data(struct op_entry *entry, unsigned long val)
+{
+       return op_cpu_buffer_add_data(entry, val);
 }
 
-#endif
+int oprofile_write_commit(struct op_entry *entry)
+{
+       return op_cpu_buffer_write_commit(entry);
+}
 
 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
 {
        struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
-       log_sample(cpu_buf, pc, is_kernel, event);
+       log_sample(cpu_buf, pc, 0, is_kernel, event);
 }
 
 void oprofile_add_trace(unsigned long pc)
@@ -332,21 +419,21 @@ void oprofile_add_trace(unsigned long pc)
        if (!cpu_buf->tracing)
                return;
 
-       if (nr_available_slots(cpu_buf) < 1) {
-               cpu_buf->tracing = 0;
-               cpu_buf->sample_lost_overflow++;
-               return;
-       }
+       /*
+        * broken frame can give an eip with the same value as an
+        * escape code, abort the trace if we get it
+        */
+       if (pc == ESCAPE_CODE)
+               goto fail;
 
-       /* broken frame can give an eip with the same value as an escape code,
-        * abort the trace if we get it */
-       if (pc == ESCAPE_CODE) {
-               cpu_buf->tracing = 0;
-               cpu_buf->backtrace_aborted++;
-               return;
-       }
+       if (op_add_sample(cpu_buf, pc, 0))
+               goto fail;
 
-       add_sample(cpu_buf, pc, 0);
+       return;
+fail:
+       cpu_buf->tracing = 0;
+       cpu_buf->backtrace_aborted++;
+       return;
 }
 
 /*
index d3cc26264db55b60948a71b9f8500a236666a64d..63f81c44846ae28b5bf2020d8fe445e4e4377d7b 100644 (file)
@@ -1,10 +1,11 @@
 /**
  * @file cpu_buffer.h
  *
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ * @author Robert Richter <robert.richter@amd.com>
  */
 
 #ifndef OPROFILE_CPU_BUFFER_H
@@ -15,6 +16,7 @@
 #include <linux/workqueue.h>
 #include <linux/cache.h>
 #include <linux/sched.h>
+#include <linux/ring_buffer.h>
 
 struct task_struct;
 
@@ -30,16 +32,16 @@ void end_cpu_work(void);
 struct op_sample {
        unsigned long eip;
        unsigned long event;
+       unsigned long data[0];
 };
 
+struct op_entry;
+
 struct oprofile_cpu_buffer {
-       volatile unsigned long head_pos;
-       volatile unsigned long tail_pos;
        unsigned long buffer_size;
        struct task_struct *last_task;
        int last_is_kernel;
        int tracing;
-       struct op_sample *buffer;
        unsigned long sample_received;
        unsigned long sample_lost_overflow;
        unsigned long backtrace_aborted;
@@ -50,12 +52,62 @@ struct oprofile_cpu_buffer {
 
 DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
 
-void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf);
+/*
+ * Resets the cpu buffer to a sane state.
+ *
+ * reset these to invalid values; the next sample collected will
+ * populate the buffer with proper values to initialize the buffer
+ */
+static inline void op_cpu_buffer_reset(int cpu)
+{
+       struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
+
+       cpu_buf->last_is_kernel = -1;
+       cpu_buf->last_task = NULL;
+}
+
+struct op_sample
+*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size);
+int op_cpu_buffer_write_commit(struct op_entry *entry);
+struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu);
+unsigned long op_cpu_buffer_entries(int cpu);
+
+/* returns the remaining free size of data in the entry */
+static inline
+int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
+{
+       if (!entry->size)
+               return 0;
+       *entry->data = val;
+       entry->size--;
+       entry->data++;
+       return entry->size;
+}
+
+/* returns the size of data in the entry */
+static inline
+int op_cpu_buffer_get_size(struct op_entry *entry)
+{
+       return entry->size;
+}
+
+/* returns 0 if empty or the size of data including the current value */
+static inline
+int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
+{
+       int size = entry->size;
+       if (!size)
+               return 0;
+       *val = *entry->data;
+       entry->size--;
+       entry->data++;
+       return size;
+}
 
-/* transient events for the CPU buffer -> event buffer */
-#define CPU_IS_KERNEL 1
-#define CPU_TRACE_BEGIN 2
-#define IBS_FETCH_BEGIN 3
-#define IBS_OP_BEGIN    4
+/* extra data flags */
+#define KERNEL_CTX_SWITCH      (1UL << 0)
+#define IS_KERNEL              (1UL << 1)
+#define TRACE_BEGIN            (1UL << 2)
+#define USER_CTX_SWITCH                (1UL << 3)
 
 #endif /* OPROFILE_CPU_BUFFER_H */
index 191a3202cecc072b8effc0ea5f0913ebde2ddc57..2b7ae366ceb151f1c197fd116b251d55fdeb3b74 100644 (file)
@@ -73,8 +73,8 @@ int alloc_event_buffer(void)
        unsigned long flags;
 
        spin_lock_irqsave(&oprofilefs_lock, flags);
-       buffer_size = fs_buffer_size;
-       buffer_watershed = fs_buffer_watershed;
+       buffer_size = oprofile_buffer_size;
+       buffer_watershed = oprofile_buffer_watershed;
        spin_unlock_irqrestore(&oprofilefs_lock, flags);
 
        if (buffer_watershed >= buffer_size)
index cd375907f26fcd3cc452f021fa893e51870941f9..3cffce90f82a9693999bb16c5741b6c2e4e842c2 100644 (file)
@@ -23,7 +23,7 @@
 struct oprofile_operations oprofile_ops;
 
 unsigned long oprofile_started;
-unsigned long backtrace_depth;
+unsigned long oprofile_backtrace_depth;
 static unsigned long is_setup;
 static DEFINE_MUTEX(start_mutex);
 
@@ -172,7 +172,7 @@ int oprofile_set_backtrace(unsigned long val)
                goto out;
        }
 
-       backtrace_depth = val;
+       oprofile_backtrace_depth = val;
 
 out:
        mutex_unlock(&start_mutex);
index 5df0c21a608ffbf48fa05b516fc9ff057509d148..c288d3c24b505ee5778eebe7f7b0f5fb3d82c23a 100644 (file)
@@ -21,12 +21,12 @@ void oprofile_stop(void);
 
 struct oprofile_operations;
 
-extern unsigned long fs_buffer_size;
-extern unsigned long fs_cpu_buffer_size;
-extern unsigned long fs_buffer_watershed;
+extern unsigned long oprofile_buffer_size;
+extern unsigned long oprofile_cpu_buffer_size;
+extern unsigned long oprofile_buffer_watershed;
 extern struct oprofile_operations oprofile_ops;
 extern unsigned long oprofile_started;
-extern unsigned long backtrace_depth;
+extern unsigned long oprofile_backtrace_depth;
 
 struct super_block;
 struct dentry;
index cc106d503acec3cdb2917ba0ea316e267474f724..5d36ffc30dd5cc82e5e8806aea88c4863a2cf219 100644 (file)
 #include "oprofile_stats.h"
 #include "oprof.h"
 
-unsigned long fs_buffer_size = 131072;
-unsigned long fs_cpu_buffer_size = 8192;
-unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
+#define BUFFER_SIZE_DEFAULT            131072
+#define CPU_BUFFER_SIZE_DEFAULT                8192
+#define BUFFER_WATERSHED_DEFAULT       32768   /* FIXME: tune */
+
+unsigned long oprofile_buffer_size;
+unsigned long oprofile_cpu_buffer_size;
+unsigned long oprofile_buffer_watershed;
 
 static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
-       return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset);
+       return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count,
+                                       offset);
 }
 
 
@@ -120,12 +125,17 @@ static const struct file_operations dump_fops = {
 
 void oprofile_create_files(struct super_block *sb, struct dentry *root)
 {
+       /* reinitialize default values */
+       oprofile_buffer_size =          BUFFER_SIZE_DEFAULT;
+       oprofile_cpu_buffer_size =      CPU_BUFFER_SIZE_DEFAULT;
+       oprofile_buffer_watershed =     BUFFER_WATERSHED_DEFAULT;
+
        oprofilefs_create_file(sb, root, "enable", &enable_fops);
        oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
        oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
-       oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
-       oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
-       oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size);
+       oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size);
+       oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed);
+       oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &oprofile_cpu_buffer_size);
        oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops);
        oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops);
        oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops);
index 5231861f357de5e3b2015ac4d8ca1d0badfefa70..1d9518bc4c58f25426898d5e7f12c9a3766663c0 100644 (file)
@@ -86,8 +86,7 @@ int oprofile_arch_init(struct oprofile_operations * ops);
 void oprofile_arch_exit(void);
 
 /**
- * Add a sample. This may be called from any context. Pass
- * smp_processor_id() as cpu.
+ * Add a sample. This may be called from any context.
  */
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
 
@@ -165,4 +164,22 @@ void oprofile_put_buff(unsigned long *buf, unsigned int start,
 unsigned long oprofile_get_cpu_buffer_size(void);
 void oprofile_cpu_buffer_inc_smpl_lost(void);
  
+/* cpu buffer functions */
+
+struct op_sample;
+
+struct op_entry {
+       struct ring_buffer_event *event;
+       struct op_sample *sample;
+       unsigned long irq_flags;
+       unsigned long size;
+       unsigned long *data;
+};
+
+void oprofile_write_reserve(struct op_entry *entry,
+                           struct pt_regs * const regs,
+                           unsigned long pc, int code, int size);
+int oprofile_add_data(struct op_entry *entry, unsigned long val);
+int oprofile_write_commit(struct op_entry *entry);
+
 #endif /* OPROFILE_H */
index e097c2e6b6dcaa212f26bf6b08b66e4c9021cb21..de9d8c12e5ec836075a76d8947f1b1e61d074ebc 100644 (file)
@@ -116,6 +116,8 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
 unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(int cpu);
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
index 668bbb5ef2bd154dc8084fe4480bfd251e484f41..d42b882dfe4ba87131beb591aba51179fe4bb209 100644 (file)
@@ -31,6 +31,7 @@ void tracing_on(void)
 {
        ring_buffers_off = 0;
 }
+EXPORT_SYMBOL_GPL(tracing_on);
 
 /**
  * tracing_off - turn off all tracing buffers
@@ -44,6 +45,7 @@ void tracing_off(void)
 {
        ring_buffers_off = 1;
 }
+EXPORT_SYMBOL_GPL(tracing_off);
 
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
@@ -60,12 +62,14 @@ u64 ring_buffer_time_stamp(int cpu)
 
        return time;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
 
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
 {
        /* Just stupid testing the normalize function and deltas */
        *ts >>= DEBUG_SHIFT;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
 
 #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
 #define RB_ALIGNMENT_SHIFT     2
@@ -113,8 +117,15 @@ rb_event_length(struct ring_buffer_event *event)
  */
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
-       return rb_event_length(event);
+       unsigned length = rb_event_length(event);
+       if (event->type != RINGBUF_TYPE_DATA)
+               return length;
+       length -= RB_EVNT_HDR_SIZE;
+       if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
+                length -= sizeof(event->array[0]);
+       return length;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 
 /* inline for ring buffer fast paths */
 static inline void *
@@ -136,6 +147,7 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
 {
        return rb_event_data(event);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 
 #define for_each_buffer_cpu(buffer, cpu)               \
        for_each_cpu_mask(cpu, buffer->cpumask)
@@ -381,7 +393,7 @@ extern int ring_buffer_page_too_big(void);
 
 /**
  * ring_buffer_alloc - allocate a new ring_buffer
- * @size: the size in bytes that is needed.
+ * @size: the size in bytes per cpu that is needed.
  * @flags: attributes to set for the ring buffer.
  *
  * Currently the only flag that is available is the RB_FL_OVERWRITE
@@ -444,6 +456,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
        kfree(buffer);
        return NULL;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_alloc);
 
 /**
  * ring_buffer_free - free a ring buffer.
@@ -459,6 +472,7 @@ ring_buffer_free(struct ring_buffer *buffer)
 
        kfree(buffer);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_free);
 
 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
 
@@ -620,6 +634,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
        mutex_unlock(&buffer->mutex);
        return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_resize);
 
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
@@ -1220,6 +1235,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
                preempt_enable_notrace();
        return NULL;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
                      struct ring_buffer_event *event)
@@ -1269,6 +1285,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
 /**
  * ring_buffer_write - write data to the buffer without reserving
@@ -1334,6 +1351,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_write);
 
 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
@@ -1360,6 +1378,7 @@ void ring_buffer_record_disable(struct ring_buffer *buffer)
 {
        atomic_inc(&buffer->record_disabled);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
 
 /**
  * ring_buffer_record_enable - enable writes to the buffer
@@ -1372,6 +1391,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer)
 {
        atomic_dec(&buffer->record_disabled);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
 
 /**
  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
@@ -1393,6 +1413,7 @@ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
        cpu_buffer = buffer->buffers[cpu];
        atomic_inc(&cpu_buffer->record_disabled);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
 
 /**
  * ring_buffer_record_enable_cpu - enable writes to the buffer
@@ -1412,6 +1433,7 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
        cpu_buffer = buffer->buffers[cpu];
        atomic_dec(&cpu_buffer->record_disabled);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
 
 /**
  * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
@@ -1428,6 +1450,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
        cpu_buffer = buffer->buffers[cpu];
        return cpu_buffer->entries;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
 
 /**
  * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
@@ -1444,6 +1467,7 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
        cpu_buffer = buffer->buffers[cpu];
        return cpu_buffer->overrun;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
 /**
  * ring_buffer_entries - get the number of entries in a buffer
@@ -1466,6 +1490,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
 
        return entries;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_entries);
 
 /**
  * ring_buffer_overrun_cpu - get the number of overruns in buffer
@@ -1488,6 +1513,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
 
        return overruns;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_overruns);
 
 /**
  * ring_buffer_iter_reset - reset an iterator
@@ -1513,6 +1539,7 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
        else
                iter->read_stamp = iter->head_page->time_stamp;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
 
 /**
  * ring_buffer_iter_empty - check if an iterator has no more to read
@@ -1527,6 +1554,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
        return iter->head_page == cpu_buffer->commit_page &&
                iter->head == rb_commit_index(cpu_buffer);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
 
 static void
 rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1797,6 +1825,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 
        return NULL;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_peek);
 
 /**
  * ring_buffer_iter_peek - peek at the next event to be read
@@ -1867,6 +1896,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
        return NULL;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
 
 /**
  * ring_buffer_consume - return an event and consume it
@@ -1894,6 +1924,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 
        return event;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_consume);
 
 /**
  * ring_buffer_read_start - start a non consuming read of the buffer
@@ -1934,6 +1965,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 
        return iter;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_start);
 
 /**
  * ring_buffer_finish - finish reading the iterator of the buffer
@@ -1950,6 +1982,7 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
        atomic_dec(&cpu_buffer->record_disabled);
        kfree(iter);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
 
 /**
  * ring_buffer_read - read the next item in the ring buffer by the iterator
@@ -1971,6 +2004,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
 
        return event;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read);
 
 /**
  * ring_buffer_size - return the size of the ring buffer (in bytes)
@@ -1980,6 +2014,7 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer)
 {
        return BUF_PAGE_SIZE * buffer->pages;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_size);
 
 static void
 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
@@ -2022,6 +2057,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 
        spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
 /**
  * ring_buffer_reset - reset a ring buffer
@@ -2034,6 +2070,7 @@ void ring_buffer_reset(struct ring_buffer *buffer)
        for_each_buffer_cpu(buffer, cpu)
                ring_buffer_reset_cpu(buffer, cpu);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_reset);
 
 /**
  * rind_buffer_empty - is the ring buffer empty?
@@ -2052,6 +2089,7 @@ int ring_buffer_empty(struct ring_buffer *buffer)
        }
        return 1;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_empty);
 
 /**
  * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
@@ -2068,6 +2106,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
        cpu_buffer = buffer->buffers[cpu];
        return rb_per_cpu_empty(cpu_buffer);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
 
 /**
  * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
@@ -2117,6 +2156,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
 
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
index d86e3252f3000024cfaf31c63ffbee765dafac53..a96b335fe75ca80bf4fcbdc75d5575a5c19a88d7 100644 (file)
@@ -914,7 +914,7 @@ enum trace_file_type {
        TRACE_FILE_LAT_FMT      = 1,
 };
 
-static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+static void trace_iterator_increment(struct trace_iterator *iter)
 {
        /* Don't allow ftrace to trace into the ring buffers */
        ftrace_disable_cpu();
@@ -993,7 +993,7 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
        iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
 
        if (iter->ent)
-               trace_iterator_increment(iter, iter->cpu);
+               trace_iterator_increment(iter);
 
        return iter->ent ? iter : NULL;
 }