]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - arch/x86/oprofile/op_model_amd.c
Merge branch 'master' into tk71
[mv-sheeva.git] / arch / x86 / oprofile / op_model_amd.c
index b67a6b5aa8d449ee06e0c22b586b6c51af4d2170..c3b8e24f2b16f4f6441c286268a61ac6a320b7c3 100644 (file)
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS 4
+#define NUM_COUNTERS           4
+#define NUM_COUNTERS_F15H      6
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_COUNTERS      32
 #else
-#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_COUNTERS      0
 #endif
 
 #define OP_EVENT_MASK                  0x0FFF
 
 #define MSR_AMD_EVENTSEL_RESERVED      ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
-static unsigned long reset_value[NUM_VIRT_COUNTERS];
+static int num_counters;
+static unsigned long reset_value[OP_MAX_COUNTER];
 
 #define IBS_FETCH_SIZE                 6
 #define IBS_OP_SIZE                    12
 
 static u32 ibs_caps;
 
-struct op_ibs_config {
+struct ibs_config {
        unsigned long op_enabled;
        unsigned long fetch_enabled;
        unsigned long max_cnt_fetch;
        unsigned long max_cnt_op;
        unsigned long rand_en;
        unsigned long dispatched_ops;
+       unsigned long branch_target;
 };
 
-static struct op_ibs_config ibs_config;
-static u64 ibs_op_ctl;
+struct ibs_state {
+       u64             ibs_op_ctl;
+       int             branch_target;
+       unsigned long   sample_size;
+};
+
+static struct ibs_config ibs_config;
+static struct ibs_state ibs_state;
 
 /*
  * IBS cpuid feature detection
  */
 
-#define IBS_CPUID_FEATURES      0x8000001b
+#define IBS_CPUID_FEATURES             0x8000001b
 
 /*
  * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
  * bit 0 is used to indicate the existence of IBS.
  */
-#define IBS_CAPS_AVAIL                 (1LL<<0)
-#define IBS_CAPS_RDWROPCNT             (1LL<<3)
-#define IBS_CAPS_OPCNT                 (1LL<<4)
+#define IBS_CAPS_AVAIL                 (1U<<0)
+#define IBS_CAPS_FETCHSAM              (1U<<1)
+#define IBS_CAPS_OPSAM                 (1U<<2)
+#define IBS_CAPS_RDWROPCNT             (1U<<3)
+#define IBS_CAPS_OPCNT                 (1U<<4)
+#define IBS_CAPS_BRNTRGT               (1U<<5)
+#define IBS_CAPS_OPCNTEXT              (1U<<6)
+
+#define IBS_CAPS_DEFAULT               (IBS_CAPS_AVAIL         \
+                                        | IBS_CAPS_FETCHSAM    \
+                                        | IBS_CAPS_OPSAM)
+
+/*
+ * IBS APIC setup
+ */
+#define IBSCTL                         0x1cc
+#define IBSCTL_LVT_OFFSET_VALID                (1ULL<<8)
+#define IBSCTL_LVT_OFFSET_MASK         0x0F
 
 /*
  * IBS randomization macros
@@ -92,12 +116,12 @@ static u32 get_ibs_caps(void)
        /* check IBS cpuid feature flags */
        max_level = cpuid_eax(0x80000000);
        if (max_level < IBS_CPUID_FEATURES)
-               return IBS_CAPS_AVAIL;
+               return IBS_CAPS_DEFAULT;
 
        ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
        if (!(ibs_caps & IBS_CAPS_AVAIL))
                /* cpuid flags not valid */
-               return IBS_CAPS_AVAIL;
+               return IBS_CAPS_DEFAULT;
 
        return ibs_caps;
 }
@@ -190,8 +214,8 @@ op_amd_handle_ibs(struct pt_regs * const regs,
                rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
                if (ctl & IBS_OP_VAL) {
                        rdmsrl(MSR_AMD64_IBSOPRIP, val);
-                       oprofile_write_reserve(&entry, regs, val,
-                                              IBS_OP_CODE, IBS_OP_SIZE);
+                       oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
+                                              ibs_state.sample_size);
                        oprofile_add_data64(&entry, val);
                        rdmsrl(MSR_AMD64_IBSOPDATA, val);
                        oprofile_add_data64(&entry, val);
@@ -203,10 +227,14 @@ op_amd_handle_ibs(struct pt_regs * const regs,
                        oprofile_add_data64(&entry, val);
                        rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
                        oprofile_add_data64(&entry, val);
+                       if (ibs_state.branch_target) {
+                               rdmsrl(MSR_AMD64_IBSBRTARGET, val);
+                               oprofile_add_data(&entry, (unsigned long)val);
+                       }
                        oprofile_write_commit(&entry);
 
                        /* reenable the IRQ */
-                       ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
+                       ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
                        wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
                }
        }
@@ -219,21 +247,32 @@ static inline void op_amd_start_ibs(void)
        if (!ibs_caps)
                return;
 
+       memset(&ibs_state, 0, sizeof(ibs_state));
+
+       /*
+        * Note: Since the max count settings may out of range we
+        * write back the actual used values so that userland can read
+        * it.
+        */
+
        if (ibs_config.fetch_enabled) {
-               val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
+               val = ibs_config.max_cnt_fetch >> 4;
+               val = min(val, IBS_FETCH_MAX_CNT);
+               ibs_config.max_cnt_fetch = val << 4;
                val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
                val |= IBS_FETCH_ENABLE;
                wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
        }
 
        if (ibs_config.op_enabled) {
-               ibs_op_ctl = ibs_config.max_cnt_op >> 4;
+               val = ibs_config.max_cnt_op >> 4;
                if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
                        /*
                         * IbsOpCurCnt not supported.  See
                         * op_amd_randomize_ibs_op() for details.
                         */
-                       ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
+                       val = clamp(val, 0x0081ULL, 0xFF80ULL);
+                       ibs_config.max_cnt_op = val << 4;
                } else {
                        /*
                         * The start value is randomized with a
@@ -241,13 +280,24 @@ static inline void op_amd_start_ibs(void)
                         * with the half of the randomized range. Also
                         * avoid underflows.
                         */
-                       ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
-                                        IBS_OP_MAX_CNT);
+                       val += IBS_RANDOM_MAXCNT_OFFSET;
+                       if (ibs_caps & IBS_CAPS_OPCNTEXT)
+                               val = min(val, IBS_OP_MAX_CNT_EXT);
+                       else
+                               val = min(val, IBS_OP_MAX_CNT);
+                       ibs_config.max_cnt_op =
+                               (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
+               }
+               val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
+               val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+               val |= IBS_OP_ENABLE;
+               ibs_state.ibs_op_ctl = val;
+               ibs_state.sample_size = IBS_OP_SIZE;
+               if (ibs_config.branch_target) {
+                       ibs_state.branch_target = 1;
+                       ibs_state.sample_size++;
                }
-               if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
-                       ibs_op_ctl |= IBS_OP_CNT_CTL;
-               ibs_op_ctl |= IBS_OP_ENABLE;
-               val = op_amd_randomize_ibs_op(ibs_op_ctl);
+               val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
                wrmsrl(MSR_AMD64_IBSOPCTL, val);
        }
 }
@@ -266,6 +316,70 @@ static void op_amd_stop_ibs(void)
                wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
 
+static inline int eilvt_is_available(int offset)
+{
+       /* check if we may assign a vector */
+       return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+}
+
+static inline int ibs_eilvt_valid(void)
+{
+       int offset;
+       u64 val;
+
+       rdmsrl(MSR_AMD64_IBSCTL, val);
+       offset = val & IBSCTL_LVT_OFFSET_MASK;
+
+       if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
+               pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+               return 0;
+       }
+
+       if (!eilvt_is_available(offset)) {
+               pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+               return 0;
+       }
+
+       return 1;
+}
+
+static inline int get_ibs_offset(void)
+{
+       u64 val;
+
+       rdmsrl(MSR_AMD64_IBSCTL, val);
+       if (!(val & IBSCTL_LVT_OFFSET_VALID))
+               return -EINVAL;
+
+       return val & IBSCTL_LVT_OFFSET_MASK;
+}
+
+static void setup_APIC_ibs(void)
+{
+       int offset;
+
+       offset = get_ibs_offset();
+       if (offset < 0)
+               goto failed;
+
+       if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+               return;
+failed:
+       pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n",
+               smp_processor_id());
+}
+
+static void clear_APIC_ibs(void)
+{
+       int offset;
+
+       offset = get_ibs_offset();
+       if (offset >= 0)
+               setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+}
+
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
 static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
@@ -275,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
        int i;
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -294,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 {
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!msrs->counters[i].addr)
                        continue;
                release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
@@ -306,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 {
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; i++) {
+       for (i = 0; i < num_counters; i++) {
                if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
                        goto fail;
                if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
@@ -314,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
                        goto fail;
                }
                /* both registers must be reserved */
-               msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
-               msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+               if (num_counters == NUM_COUNTERS_F15H) {
+                       msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
+                       msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
+               } else {
+                       msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+                       msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+               }
                continue;
        fail:
                if (!counter_config[i].enabled)
@@ -335,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        int i;
 
        /* setup reset_value */
-       for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+       for (i = 0; i < OP_MAX_COUNTER; ++i) {
                if (counter_config[i].enabled
                    && msrs->counters[op_x86_virt_to_phys(i)].addr)
                        reset_value[i] = counter_config[i].count;
@@ -344,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        /* clear all counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!msrs->controls[i].addr)
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -360,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -376,13 +495,13 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        if (ibs_caps)
-               setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0);
+               setup_APIC_ibs();
 }
 
 static void op_amd_cpu_shutdown(void)
 {
        if (ibs_caps)
-               setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
+               clear_APIC_ibs();
 }
 
 static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -391,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
        u64 val;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -414,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
        u64 val;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -434,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
         * Subtle: stop on all counters to avoid race with setting our
         * pm callback
         */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -445,16 +564,11 @@ static void op_amd_stop(struct op_msrs const * const msrs)
        op_amd_stop_ibs();
 }
 
-static int __init_ibs_nmi(void)
+static int setup_ibs_ctl(int ibs_eilvt_off)
 {
-#define IBSCTL_LVTOFFSETVAL            (1 << 8)
-#define IBSCTL                         0x1cc
        struct pci_dev *cpu_cfg;
        int nodes;
        u32 value = 0;
-       u8 ibs_eilvt_off;
-
-       ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
 
        nodes = 0;
        cpu_cfg = NULL;
@@ -466,39 +580,87 @@ static int __init_ibs_nmi(void)
                        break;
                ++nodes;
                pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
-                                      | IBSCTL_LVTOFFSETVAL);
+                                      | IBSCTL_LVT_OFFSET_VALID);
                pci_read_config_dword(cpu_cfg, IBSCTL, &value);
-               if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
+               if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
                        pci_dev_put(cpu_cfg);
                        printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
-                               "IBSCTL = 0x%08x", value);
-                       return 1;
+                              "IBSCTL = 0x%08x\n", value);
+                       return -EINVAL;
                }
        } while (1);
 
        if (!nodes) {
-               printk(KERN_DEBUG "No CPU node configured for IBS");
-               return 1;
+               printk(KERN_DEBUG "No CPU node configured for IBS\n");
+               return -ENODEV;
        }
 
        return 0;
 }
 
-/* initialize the APIC for the IBS interrupts if available */
+static int force_ibs_eilvt_setup(void)
+{
+       int i;
+       int ret;
+
+       /* find the next free available EILVT entry */
+       for (i = 1; i < 4; i++) {
+               if (!eilvt_is_available(i))
+                       continue;
+               ret = setup_ibs_ctl(i);
+               if (ret)
+                       return ret;
+               pr_err(FW_BUG "using offset %d for IBS interrupts\n", i);
+               return 0;
+       }
+
+       printk(KERN_DEBUG "No EILVT entry available\n");
+
+       return -EBUSY;
+}
+
+static int __init_ibs_nmi(void)
+{
+       int ret;
+
+       if (ibs_eilvt_valid())
+               return 0;
+
+       ret = force_ibs_eilvt_setup();
+       if (ret)
+               return ret;
+
+       if (!ibs_eilvt_valid())
+               return -EFAULT;
+
+       pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
+
+       return 0;
+}
+
+/*
+ * check and reserve APIC extended interrupt LVT offset for IBS if
+ * available
+ *
+ * init_ibs() preforms implicitly cpu-local operations, so pin this
+ * thread to its current CPU
+ */
+
 static void init_ibs(void)
 {
-       ibs_caps = get_ibs_caps();
+       preempt_disable();
 
+       ibs_caps = get_ibs_caps();
        if (!ibs_caps)
-               return;
+               goto out;
 
-       if (__init_ibs_nmi()) {
+       if (__init_ibs_nmi() < 0)
                ibs_caps = 0;
-               return;
-       }
+       else
+               printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 
-       printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
-              (unsigned)ibs_caps);
+out:
+       preempt_enable();
 }
 
 static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
@@ -521,44 +683,60 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
        /* model specific files */
 
        /* setup some reasonable defaults */
+       memset(&ibs_config, 0, sizeof(ibs_config));
        ibs_config.max_cnt_fetch = 250000;
-       ibs_config.fetch_enabled = 0;
        ibs_config.max_cnt_op = 250000;
-       ibs_config.op_enabled = 0;
-       ibs_config.dispatched_ops = 0;
-
-       dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
-       oprofilefs_create_ulong(sb, dir, "enable",
-                               &ibs_config.fetch_enabled);
-       oprofilefs_create_ulong(sb, dir, "max_count",
-                               &ibs_config.max_cnt_fetch);
-       oprofilefs_create_ulong(sb, dir, "rand_enable",
-                               &ibs_config.rand_en);
-
-       dir = oprofilefs_mkdir(sb, root, "ibs_op");
-       oprofilefs_create_ulong(sb, dir, "enable",
-                               &ibs_config.op_enabled);
-       oprofilefs_create_ulong(sb, dir, "max_count",
-                               &ibs_config.max_cnt_op);
-       if (ibs_caps & IBS_CAPS_OPCNT)
-               oprofilefs_create_ulong(sb, dir, "dispatched_ops",
-                                       &ibs_config.dispatched_ops);
+
+       if (ibs_caps & IBS_CAPS_FETCHSAM) {
+               dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
+               oprofilefs_create_ulong(sb, dir, "enable",
+                                       &ibs_config.fetch_enabled);
+               oprofilefs_create_ulong(sb, dir, "max_count",
+                                       &ibs_config.max_cnt_fetch);
+               oprofilefs_create_ulong(sb, dir, "rand_enable",
+                                       &ibs_config.rand_en);
+       }
+
+       if (ibs_caps & IBS_CAPS_OPSAM) {
+               dir = oprofilefs_mkdir(sb, root, "ibs_op");
+               oprofilefs_create_ulong(sb, dir, "enable",
+                                       &ibs_config.op_enabled);
+               oprofilefs_create_ulong(sb, dir, "max_count",
+                                       &ibs_config.max_cnt_op);
+               if (ibs_caps & IBS_CAPS_OPCNT)
+                       oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+                                               &ibs_config.dispatched_ops);
+               if (ibs_caps & IBS_CAPS_BRNTRGT)
+                       oprofilefs_create_ulong(sb, dir, "branch_target",
+                                               &ibs_config.branch_target);
+       }
 
        return 0;
 }
 
+struct op_x86_model_spec op_amd_spec;
+
 static int op_amd_init(struct oprofile_operations *ops)
 {
        init_ibs();
        create_arch_files = ops->create_files;
        ops->create_files = setup_ibs_files;
+
+       if (boot_cpu_data.x86 == 0x15) {
+               num_counters = NUM_COUNTERS_F15H;
+       } else {
+               num_counters = NUM_COUNTERS;
+       }
+
+       op_amd_spec.num_counters = num_counters;
+       op_amd_spec.num_controls = num_counters;
+       op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
+
        return 0;
 }
 
 struct op_x86_model_spec op_amd_spec = {
-       .num_counters           = NUM_COUNTERS,
-       .num_controls           = NUM_COUNTERS,
-       .num_virt_counters      = NUM_VIRT_COUNTERS,
+       /* num_counters/num_controls filled in at runtime */
        .reserved               = MSR_AMD_EVENTSEL_RESERVED,
        .event_mask             = OP_EVENT_MASK,
        .init                   = op_amd_init,