Merge branch 'perf/urgent' into perf/core

author Ingo Molnar <mingo@kernel.org>

Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)

committer Ingo Molnar <mingo@kernel.org>

Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
author Ingo Molnar <mingo@kernel.org>
Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
committer Ingo Molnar <mingo@kernel.org>
Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h

index cc16faae0538431073b01dfa0bd5c96ab4383ae1..ce84edeeae271c4a428e930d8f04852279f3556a 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -163,6 +163,11 @@ struct cpu_hw_events {
         u64                             intel_ctrl_host_mask;
         struct perf_guest_switch_msr    guest_switch_msrs[X86_PMC_IDX_MAX];
  
+       /*
+        * Intel checkpoint mask
+        */
+       u64                             intel_cp_status;
+
         /*
          * manage shared (per-core, per-cpu) registers
          * used on Intel NHM/WSM/SNB
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c

index f31a1655d1ff5bd602239e211b14bdd28d95a79a..36b5ab884c15662638bb93a837e05a2288c3ee04 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -190,9 +190,9 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
         EVENT_EXTRA_END
  };
  
-EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
-EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
+EVENT_ATTR_STR(mem-loads,      mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
  
  struct attribute *nhm_events_attrs[] = {
         EVENT_PTR(mem_ld_nhm),
@@ -1184,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
         wrmsrl(hwc->config_base, ctrl_val);
  }
  
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+       return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
  static void intel_pmu_disable_event(struct perf_event *event)
  {
         struct hw_perf_event *hwc = &event->hw;
@@ -1197,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
  
         cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
         cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+       cpuc->intel_cp_status &= ~(1ull << hwc->idx);
  
         /*
          * must disable before any actual event
@@ -1271,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
         if (event->attr.exclude_guest)
                 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
  
+       if (unlikely(event_is_checkpointed(event)))
+               cpuc->intel_cp_status |= (1ull << hwc->idx);
+
         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
                 intel_pmu_enable_fixed(hwc);
                 return;
@@ -1289,6 +1298,17 @@ static void intel_pmu_enable_event(struct perf_event *event)
  int intel_pmu_save_and_restart(struct perf_event *event)
  {
         x86_perf_event_update(event);
+       /*
+        * For a checkpointed counter always reset back to 0.  This
+        * avoids a situation where the counter overflows, aborts the
+        * transaction and is then set back to shortly before the
+        * overflow, and overflows and aborts again.
+        */
+       if (unlikely(event_is_checkpointed(event))) {
+               /* No race with NMIs because the counter should not be armed */
+               wrmsrl(event->hw.event_base, 0);
+               local64_set(&event->hw.prev_count, 0);
+       }
         return x86_perf_event_set_period(event);
  }
  
@@ -1372,6 +1392,13 @@ again:
                 x86_pmu.drain_pebs(regs);
         }
  
+       /*
+        * Checkpointed counters can lead to 'spurious' PMIs because the
+        * rollback caused by the PMI will have cleared the overflow status
+        * bit. Therefore always force probe these counters.
+        */
+       status |= cpuc->intel_cp_status;
+
         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
                 struct perf_event *event = cpuc->events[bit];
  
@@ -1837,6 +1864,20 @@ static int hsw_hw_config(struct perf_event *event)
               event->attr.precise_ip > 0))
                 return -EOPNOTSUPP;
  
+       if (event_is_checkpointed(event)) {
+               /*
+                * Sampling of checkpointed events can cause situations where
+                * the CPU constantly aborts because of a overflow, which is
+                * then checkpointed back and ignored. Forbid checkpointing
+                * for sampling.
+                *
+                * But still allow a long sampling period, so that perf stat
+                * from KVM works.
+                */
+               if (event->attr.sample_period > 0 &&
+                   event->attr.sample_period < 0x7fffffff)
+                       return -EOPNOTSUPP;
+       }
         return 0;
  }
  
@@ -2182,10 +2223,36 @@ static __init void intel_nehalem_quirk(void)
         }
  }
  
-EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start,       tx_start,       "event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit,      tx_commit,      "event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort,       tx_abort,       "event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity,    tx_capacity,    "event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict,    tx_conflict,    "event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start,       el_start,       "event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit,      el_commit,      "event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort,       el_abort,       "event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity,    el_capacity,    "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict,    el_conflict,    "event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t,       cycles_t,       "event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct,      cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
  
  static struct attribute *hsw_events_attrs[] = {
+       EVENT_PTR(tx_start),
+       EVENT_PTR(tx_commit),
+       EVENT_PTR(tx_abort),
+       EVENT_PTR(tx_capacity),
+       EVENT_PTR(tx_conflict),
+       EVENT_PTR(el_start),
+       EVENT_PTR(el_commit),
+       EVENT_PTR(el_abort),
+       EVENT_PTR(el_capacity),
+       EVENT_PTR(el_conflict),
+       EVENT_PTR(cycles_t),
+       EVENT_PTR(cycles_ct),
         EVENT_PTR(mem_ld_hsw),
         EVENT_PTR(mem_st_hsw),
         NULL
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c

index ab3ba1c1b7dd2c425dd5edf4c2b5091d76355b34..07d9a052ee72dfd17845731cdccacff1f72b778d 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -182,16 +182,28 @@ struct pebs_record_nhm {
   * Same as pebs_record_nhm, with two additional fields.
   */
  struct pebs_record_hsw {
-       struct pebs_record_nhm nhm;
-       /*
-        * Real IP of the event. In the Intel documentation this
-        * is called eventingrip.
-        */
-       u64 real_ip;
-       /*
-        * TSX tuning information field: abort cycles and abort flags.
-        */
-       u64 tsx_tuning;
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+       u64 status, dla, dse, lat;
+       u64 real_ip, tsx_tuning;
+};
+
+union hsw_tsx_tuning {
+       struct {
+               u32 cycles_last_block     : 32,
+                   hle_abort             : 1,
+                   rtm_abort             : 1,
+                   instruction_abort     : 1,
+                   non_instruction_abort : 1,
+                   retry                 : 1,
+                   data_conflict         : 1,
+                   capacity_writes       : 1,
+                   capacity_reads        : 1;
+       };
+       u64         value;
  };
  
  void init_debug_store_on_cpu(int cpu)
@@ -786,16 +798,24 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
         return 0;
  }
  
+static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
+{
+       if (pebs->tsx_tuning) {
+               union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+               return tsx.cycles_last_block;
+       }
+       return 0;
+}
+
  static void __intel_pmu_pebs_event(struct perf_event *event,
                                    struct pt_regs *iregs, void *__pebs)
  {
         /*
-        * We cast to pebs_record_nhm to get the load latency data
-        * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
+        * We cast to the biggest pebs_record but are careful not to
+        * unconditionally access the 'extra' entries.
          */
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct pebs_record_nhm *pebs = __pebs;
-       struct pebs_record_hsw *pebs_hsw = __pebs;
+       struct pebs_record_hsw *pebs = __pebs;
         struct perf_sample_data data;
         struct pt_regs regs;
         u64 sample_type;
@@ -854,7 +874,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
         regs.sp = pebs->sp;
  
         if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-               regs.ip = pebs_hsw->real_ip;
+               regs.ip = pebs->real_ip;
                 regs.flags |= PERF_EFLAGS_EXACT;
         } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
                 regs.flags |= PERF_EFLAGS_EXACT;
@@ -862,9 +882,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
                 regs.flags &= ~PERF_EFLAGS_EXACT;
  
         if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
-               x86_pmu.intel_cap.pebs_format >= 1)
+           x86_pmu.intel_cap.pebs_format >= 1)
                 data.addr = pebs->dla;
  
+       /* Only set the TSX weight when no memory weight was requested. */
+       if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll &&
+           (x86_pmu.intel_cap.pebs_format >= 2))
+               data.weight = intel_hsw_weight(pebs);
+
         if (has_branch_stack(event))
                 data.br_stack = &cpuc->lbr_stack;
  
@@ -913,17 +938,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
         __intel_pmu_pebs_event(event, iregs, at);
  }
  
-static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
-                                       void *top)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
  {
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         struct debug_store *ds = cpuc->ds;
         struct perf_event *event = NULL;
+       void *at, *top;
         u64 status = 0;
         int bit;
  
+       if (!x86_pmu.pebs_active)
+               return;
+
+       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
         ds->pebs_index = ds->pebs_buffer_base;
  
+       if (unlikely(at > top))
+               return;
+
+       /*
+        * Should not happen, we program the threshold at 1 and do not
+        * set a reset value.
+        */
+       WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
+                 "Unexpected number of pebs records %ld\n",
+                 (long)(top - at) / x86_pmu.pebs_record_size);
+
         for (; at < top; at += x86_pmu.pebs_record_size) {
                 struct pebs_record_nhm *p = at;
  
@@ -951,61 +993,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
         }
  }
  
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct pebs_record_nhm *at, *top;
-       int n;
-
-       if (!x86_pmu.pebs_active)
-               return;
-
-       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
-       ds->pebs_index = ds->pebs_buffer_base;
-
-       n = top - at;
-       if (n <= 0)
-               return;
-
-       /*
-        * Should not happen, we program the threshold at 1 and do not
-        * set a reset value.
-        */
-       WARN_ONCE(n > x86_pmu.max_pebs_events,
-                 "Unexpected number of pebs records %d\n", n);
-
-       return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
-static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct pebs_record_hsw *at, *top;
-       int n;
-
-       if (!x86_pmu.pebs_active)
-               return;
-
-       at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
-
-       n = top - at;
-       if (n <= 0)
-               return;
-       /*
-        * Should not happen, we program the threshold at 1 and do not
-        * set a reset value.
-        */
-       WARN_ONCE(n > x86_pmu.max_pebs_events,
-                 "Unexpected number of pebs records %d\n", n);
-
-       return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
  /*
   * BTS, PEBS probe and setup
   */
@@ -1040,7 +1027,7 @@ void intel_ds_init(void)
                 case 2:
                         pr_cont("PEBS fmt2%c, ", pebs_type);
                         x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
                         break;
  
                 default:
author	Ingo Molnar <mingo@kernel.org>
	Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
arch/x86/kernel/cpu/perf_event.h		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_intel.c		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_intel_ds.c		patch \| blob \| history