]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge commit 'tip/perfcounters-for-linus' into oprofile/master
authorRobert Richter <robert.richter@amd.com>
Fri, 12 Jun 2009 15:58:48 +0000 (17:58 +0200)
committerRobert Richter <robert.richter@amd.com>
Fri, 12 Jun 2009 15:58:48 +0000 (17:58 +0200)
Conflicts:
arch/x86/oprofile/op_model_ppro.c

Signed-off-by: Robert Richter <robert.richter@amd.com>
1  2 
arch/x86/oprofile/nmi_int.c
arch/x86/oprofile/op_model_ppro.c
arch/x86/oprofile/op_x86_model.h

index 80b63d5db50918b840677f96e952eeed63f7b8c2,b07dd8d0b321d9e04fd94b307c0f51befdc20933..7826dfcc842823a7a893b4e2831c34864597b1c7
@@@ -31,26 -31,6 +31,26 @@@ static DEFINE_PER_CPU(unsigned long, sa
  /* 0 == registered but off, 1 == registered and on */
  static int nmi_enabled = 0;
  
 +/* common functions */
 +
 +u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
 +                  struct op_counter_config *counter_config)
 +{
 +      u64 val = 0;
 +      u16 event = (u16)counter_config->event;
 +
 +      val |= ARCH_PERFMON_EVENTSEL_INT;
 +      val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
 +      val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
 +      val |= (counter_config->unit_mask & 0xFF) << 8;
 +      event &= model->event_mask ? model->event_mask : 0xFF;
 +      val |= event & 0xFF;
 +      val |= (event & 0x0F00) << 24;
 +
 +      return val;
 +}
 +
 +
  static int profile_exceptions_notify(struct notifier_block *self,
                                     unsigned long val, void *data)
  {
@@@ -60,8 -40,9 +60,9 @@@
  
        switch (val) {
        case DIE_NMI:
-               if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
-                       ret = NOTIFY_STOP;
+       case DIE_NMI_IPI:
+               model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
+               ret = NOTIFY_STOP;
                break;
        default:
                break;
  
  static void nmi_cpu_save_registers(struct op_msrs *msrs)
  {
 -      unsigned int const nr_ctrs = model->num_counters;
 -      unsigned int const nr_ctrls = model->num_controls;
        struct op_msr *counters = msrs->counters;
        struct op_msr *controls = msrs->controls;
        unsigned int i;
  
 -      for (i = 0; i < nr_ctrs; ++i) {
 -              if (counters[i].addr) {
 -                      rdmsr(counters[i].addr,
 -                              counters[i].saved.low,
 -                              counters[i].saved.high);
 -              }
 +      for (i = 0; i < model->num_counters; ++i) {
 +              if (counters[i].addr)
 +                      rdmsrl(counters[i].addr, counters[i].saved);
        }
  
 -      for (i = 0; i < nr_ctrls; ++i) {
 -              if (controls[i].addr) {
 -                      rdmsr(controls[i].addr,
 -                              controls[i].saved.low,
 -                              controls[i].saved.high);
 -              }
 +      for (i = 0; i < model->num_controls; ++i) {
 +              if (controls[i].addr)
 +                      rdmsrl(controls[i].addr, controls[i].saved);
        }
  }
  
@@@ -137,7 -126,7 +138,7 @@@ static void nmi_cpu_setup(void *dummy
        int cpu = smp_processor_id();
        struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
        spin_lock(&oprofilefs_lock);
 -      model->setup_ctrs(msrs);
 +      model->setup_ctrs(model, msrs);
        spin_unlock(&oprofilefs_lock);
        per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
        apic_write(APIC_LVTPC, APIC_DM_NMI);
  static struct notifier_block profile_exceptions_nb = {
        .notifier_call = profile_exceptions_notify,
        .next = NULL,
-       .priority = 0
+       .priority = 2
  };
  
  static int nmi_setup(void)
  
  static void nmi_restore_registers(struct op_msrs *msrs)
  {
 -      unsigned int const nr_ctrs = model->num_counters;
 -      unsigned int const nr_ctrls = model->num_controls;
        struct op_msr *counters = msrs->counters;
        struct op_msr *controls = msrs->controls;
        unsigned int i;
  
 -      for (i = 0; i < nr_ctrls; ++i) {
 -              if (controls[i].addr) {
 -                      wrmsr(controls[i].addr,
 -                              controls[i].saved.low,
 -                              controls[i].saved.high);
 -              }
 +      for (i = 0; i < model->num_controls; ++i) {
 +              if (controls[i].addr)
 +                      wrmsrl(controls[i].addr, controls[i].saved);
        }
  
 -      for (i = 0; i < nr_ctrs; ++i) {
 -              if (counters[i].addr) {
 -                      wrmsr(counters[i].addr,
 -                              counters[i].saved.low,
 -                              counters[i].saved.high);
 -              }
 +      for (i = 0; i < model->num_counters; ++i) {
 +              if (counters[i].addr)
 +                      wrmsrl(counters[i].addr, counters[i].saved);
        }
  }
  
@@@ -431,7 -428,7 +432,7 @@@ static int __init ppro_init(char **cpu_
                *cpu_type = "i386/core_2";
                break;
        case 26:
 -              arch_perfmon_setup_counters();
 +              model = &op_arch_perfmon_spec;
                *cpu_type = "i386/core_i7";
                break;
        case 28:
        return 1;
  }
  
 -static int __init arch_perfmon_init(char **cpu_type)
 -{
 -      if (!cpu_has_arch_perfmon)
 -              return 0;
 -      *cpu_type = "i386/arch_perfmon";
 -      model = &op_arch_perfmon_spec;
 -      arch_perfmon_setup_counters();
 -      return 1;
 -}
 -
  /* in order to get sysfs right */
  static int using_nmi;
  
@@@ -464,26 -471,27 +465,26 @@@ int __init op_nmi_init(struct oprofile_
                /* Needs to be at least an Athlon (or hammer in 32bit mode) */
  
                switch (family) {
 -              default:
 -                      return -ENODEV;
                case 6:
 -                      model = &op_amd_spec;
                        cpu_type = "i386/athlon";
                        break;
                case 0xf:
 -                      model = &op_amd_spec;
 -                      /* Actually it could be i386/hammer too, but give
 -                       user space an consistent name. */
 +                      /*
 +                       * Actually it could be i386/hammer too, but
 +                       * give user space an consistent name.
 +                       */
                        cpu_type = "x86-64/hammer";
                        break;
                case 0x10:
 -                      model = &op_amd_spec;
                        cpu_type = "x86-64/family10";
                        break;
                case 0x11:
 -                      model = &op_amd_spec;
                        cpu_type = "x86-64/family11h";
                        break;
 +              default:
 +                      return -ENODEV;
                }
 +              model = &op_amd_spec;
                break;
  
        case X86_VENDOR_INTEL:
                        break;
                }
  
 -              if (!cpu_type && !arch_perfmon_init(&cpu_type))
 +              if (cpu_type)
 +                      break;
 +
 +              if (!cpu_has_arch_perfmon)
                        return -ENODEV;
 +
 +              /* use arch perfmon as fallback */
 +              cpu_type = "i386/arch_perfmon";
 +              model = &op_arch_perfmon_spec;
                break;
  
        default:
index 0a261a5c696e5bdad2b2c28c058916fffb666b67,4da7230b3d17138f7fcec0cf0b13f265c633f862..cd72d5c73b490c3cc4341c5f5c7eee4b1f076832
@@@ -10,7 -10,6 +10,7 @@@
   * @author Philippe Elie
   * @author Graydon Hoare
   * @author Andi Kleen
 + * @author Robert Richter <robert.richter@amd.com>
   */
  
  #include <linux/oprofile.h>
@@@ -19,6 -18,7 +19,6 @@@
  #include <asm/msr.h>
  #include <asm/apic.h>
  #include <asm/nmi.h>
 -#include <asm/perf_counter.h>
  
  #include "op_x86_model.h"
  #include "op_counter.h"
  static int num_counters = 2;
  static int counter_width = 32;
  
 -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
 -#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
 -
 -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
 -#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
 -#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
 -#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
 -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
 -#define CTRL_CLEAR(x) (x &= (1<<21))
 -#define CTRL_SET_ENABLE(val) (val |= 1<<20)
 -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
 -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
 -#define CTRL_SET_UM(val, m) (val |= (m << 8))
 -#define CTRL_SET_EVENT(val, e) (val |= e)
 +#define MSR_PPRO_EVENTSEL_RESERVED    ((0xFFFFFFFFULL<<32)|(1ULL<<21))
  
  static u64 *reset_value;
  
@@@ -50,10 -63,9 +50,10 @@@ static void ppro_fill_in_addresses(stru
  }
  
  
 -static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 +static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 +                          struct op_msrs const * const msrs)
  {
 -      unsigned int low, high;
 +      u64 val;
        int i;
  
        if (!reset_value) {
  
        /* clear all counters */
        for (i = 0 ; i < num_counters; ++i) {
 -              if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 +              if (unlikely(!msrs->controls[i].addr))
                        continue;
 -              CTRL_READ(low, high, msrs, i);
 -              CTRL_CLEAR(low);
 -              CTRL_WRITE(low, high, msrs, i);
 +              rdmsrl(msrs->controls[i].addr, val);
 +              val &= model->reserved;
 +              wrmsrl(msrs->controls[i].addr, val);
        }
  
        /* avoid a false detection of ctr overflows in NMI handler */
        for (i = 0; i < num_counters; ++i) {
 -              if (unlikely(!CTR_IS_RESERVED(msrs, i)))
 +              if (unlikely(!msrs->counters[i].addr))
                        continue;
                wrmsrl(msrs->counters[i].addr, -1LL);
        }
  
        /* enable active counters */
        for (i = 0; i < num_counters; ++i) {
 -              if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
 +              if (counter_config[i].enabled && msrs->counters[i].addr) {
                        reset_value[i] = counter_config[i].count;
 -
                        wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 -
 -                      CTRL_READ(low, high, msrs, i);
 -                      CTRL_CLEAR(low);
 -                      CTRL_SET_ENABLE(low);
 -                      CTRL_SET_USR(low, counter_config[i].user);
 -                      CTRL_SET_KERN(low, counter_config[i].kernel);
 -                      CTRL_SET_UM(low, counter_config[i].unit_mask);
 -                      CTRL_SET_EVENT(low, counter_config[i].event);
 -                      CTRL_WRITE(low, high, msrs, i);
 +                      rdmsrl(msrs->controls[i].addr, val);
 +                      val &= model->reserved;
 +                      val |= op_x86_get_ctrl(model, &counter_config[i]);
 +                      wrmsrl(msrs->controls[i].addr, val);
                } else {
                        reset_value[i] = 0;
                }
@@@ -118,16 -136,24 +118,24 @@@ static int ppro_check_ctrs(struct pt_re
        u64 val;
        int i;
  
+       /*
+        * This can happen if perf counters are in use when
+        * we steal the die notifier NMI.
+        */
+       if (unlikely(!reset_value))
+               goto out;
        for (i = 0 ; i < num_counters; ++i) {
                if (!reset_value[i])
                        continue;
                rdmsrl(msrs->counters[i].addr, val);
 -              if (CTR_OVERFLOWED(val)) {
 -                      oprofile_add_sample(regs, i);
 -                      wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 -              }
 +              if (val & (1ULL << (counter_width - 1)))
 +                      continue;
 +              oprofile_add_sample(regs, i);
 +              wrmsrl(msrs->counters[i].addr, -reset_value[i]);
        }
  
+ out:
        /* Only P6 based Pentium M need to re-unmask the apic vector but it
         * doesn't hurt other P6 variant */
        apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  
  static void ppro_start(struct op_msrs const * const msrs)
  {
 -      unsigned int low, high;
 +      u64 val;
        int i;
  
        if (!reset_value)
                return;
        for (i = 0; i < num_counters; ++i) {
                if (reset_value[i]) {
 -                      CTRL_READ(low, high, msrs, i);
 -                      CTRL_SET_ACTIVE(low);
 -                      CTRL_WRITE(low, high, msrs, i);
 +                      rdmsrl(msrs->controls[i].addr, val);
 +                      val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 +                      wrmsrl(msrs->controls[i].addr, val);
                }
        }
  }
  
  static void ppro_stop(struct op_msrs const * const msrs)
  {
 -      unsigned int low, high;
 +      u64 val;
        int i;
  
        if (!reset_value)
        for (i = 0; i < num_counters; ++i) {
                if (!reset_value[i])
                        continue;
 -              CTRL_READ(low, high, msrs, i);
 -              CTRL_SET_INACTIVE(low);
 -              CTRL_WRITE(low, high, msrs, i);
 +              rdmsrl(msrs->controls[i].addr, val);
 +              val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
 +              wrmsrl(msrs->controls[i].addr, val);
        }
  }
  
@@@ -181,11 -207,11 +189,11 @@@ static void ppro_shutdown(struct op_msr
        int i;
  
        for (i = 0 ; i < num_counters ; ++i) {
 -              if (CTR_IS_RESERVED(msrs, i))
 +              if (msrs->counters[i].addr)
                        release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
        }
        for (i = 0 ; i < num_counters ; ++i) {
 -              if (CTRL_IS_RESERVED(msrs, i))
 +              if (msrs->controls[i].addr)
                        release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
        }
        if (reset_value) {
  }
  
  
 -struct op_x86_model_spec op_ppro_spec = {
 -      .num_counters           = 2,    /* can be overriden */
 -      .num_controls           = 2,    /* dito */
 +struct op_x86_model_spec const op_ppro_spec = {
 +      .num_counters           = 2,
 +      .num_controls           = 2,
 +      .reserved               = MSR_PPRO_EVENTSEL_RESERVED,
        .fill_in_addresses      = &ppro_fill_in_addresses,
        .setup_ctrs             = &ppro_setup_ctrs,
        .check_ctrs             = &ppro_check_ctrs,
   * the specific CPU.
   */
  
 -void arch_perfmon_setup_counters(void)
 +static void arch_perfmon_setup_counters(void)
  {
        union cpuid10_eax eax;
  
  
        op_arch_perfmon_spec.num_counters = num_counters;
        op_arch_perfmon_spec.num_controls = num_counters;
 -      op_ppro_spec.num_counters = num_counters;
 -      op_ppro_spec.num_controls = num_counters;
 +}
 +
 +static int arch_perfmon_init(struct oprofile_operations *ignore)
 +{
 +      arch_perfmon_setup_counters();
 +      return 0;
  }
  
  struct op_x86_model_spec op_arch_perfmon_spec = {
 +      .reserved               = MSR_PPRO_EVENTSEL_RESERVED,
 +      .init                   = &arch_perfmon_init,
        /* num_counters/num_controls filled in at runtime */
        .fill_in_addresses      = &ppro_fill_in_addresses,
        /* user space does the cpuid check for available events */
index fda52b4c1b95bcc90d5aae1ad3bcfcbb22236d07,825e79064d64ebd87c71f74954a861e69d5abaae..505489873b9d31f95a356cc3f478c7290a79a082
@@@ -6,18 -6,19 +6,18 @@@
   * @remark Read the file COPYING
   *
   * @author Graydon Hoare
 + * @author Robert Richter <robert.richter@amd.com>
   */
  
  #ifndef OP_X86_MODEL_H
  #define OP_X86_MODEL_H
  
 -struct op_saved_msr {
 -      unsigned int high;
 -      unsigned int low;
 -};
 +#include <asm/types.h>
- #include <asm/intel_arch_perfmon.h>
++#include <asm/perf_counter.h>
  
  struct op_msr {
 -      unsigned long addr;
 -      struct op_saved_msr saved;
 +      unsigned long   addr;
 +      u64             saved;
  };
  
  struct op_msrs {
  
  struct pt_regs;
  
 +struct oprofile_operations;
 +
  /* The model vtable abstracts the differences between
   * various x86 CPU models' perfctr support.
   */
  struct op_x86_model_spec {
 -      int (*init)(struct oprofile_operations *ops);
 -      void (*exit)(void);
 -      unsigned int num_counters;
 -      unsigned int num_controls;
 -      void (*fill_in_addresses)(struct op_msrs * const msrs);
 -      void (*setup_ctrs)(struct op_msrs const * const msrs);
 -      int (*check_ctrs)(struct pt_regs * const regs,
 -              struct op_msrs const * const msrs);
 -      void (*start)(struct op_msrs const * const msrs);
 -      void (*stop)(struct op_msrs const * const msrs);
 -      void (*shutdown)(struct op_msrs const * const msrs);
 +      unsigned int    num_counters;
 +      unsigned int    num_controls;
 +      u64             reserved;
 +      u16             event_mask;
 +      int             (*init)(struct oprofile_operations *ops);
 +      void            (*exit)(void);
 +      void            (*fill_in_addresses)(struct op_msrs * const msrs);
 +      void            (*setup_ctrs)(struct op_x86_model_spec const *model,
 +                                    struct op_msrs const * const msrs);
 +      int             (*check_ctrs)(struct pt_regs * const regs,
 +                                    struct op_msrs const * const msrs);
 +      void            (*start)(struct op_msrs const * const msrs);
 +      void            (*stop)(struct op_msrs const * const msrs);
 +      void            (*shutdown)(struct op_msrs const * const msrs);
  };
  
 -extern struct op_x86_model_spec op_ppro_spec;
 +struct op_counter_config;
 +
 +extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
 +                         struct op_counter_config *counter_config);
 +
 +extern struct op_x86_model_spec const op_ppro_spec;
  extern struct op_x86_model_spec const op_p4_spec;
  extern struct op_x86_model_spec const op_p4_ht2_spec;
  extern struct op_x86_model_spec const op_amd_spec;
  extern struct op_x86_model_spec op_arch_perfmon_spec;
  
 -extern void arch_perfmon_setup_counters(void);
 -
  #endif /* OP_X86_MODEL_H */