]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'perf/urgent' into tools/perf/build
authorIngo Molnar <mingo@kernel.org>
Tue, 8 Oct 2013 09:51:31 +0000 (11:51 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 8 Oct 2013 09:51:31 +0000 (11:51 +0200)
30 files changed:
arch/x86/kernel/cpu/perf_event.h
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/kernel/cpu/perf_event_intel_lbr.c
include/linux/perf_event.h
include/uapi/linux/perf_event.h
kernel/events/core.c
kernel/sysctl.c
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/builtin-annotate.c
tools/perf/builtin-diff.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/perf.h
tools/perf/tests/hists_link.c
tools/perf/util/event.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/session.c
tools/perf/util/sort.c
tools/perf/util/sort.h

index cc16faae0538431073b01dfa0bd5c96ab4383ae1..fd00bb29425d4da50194241c6ae3835775e12e6e 100644 (file)
@@ -163,6 +163,11 @@ struct cpu_hw_events {
        u64                             intel_ctrl_host_mask;
        struct perf_guest_switch_msr    guest_switch_msrs[X86_PMC_IDX_MAX];
 
+       /*
+        * Intel checkpoint mask
+        */
+       u64                             intel_cp_status;
+
        /*
         * manage shared (per-core, per-cpu) registers
         * used on Intel NHM/WSM/SNB
@@ -440,6 +445,7 @@ struct x86_pmu {
        int             lbr_nr;                    /* hardware stack size */
        u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
        const int       *lbr_sel_map;              /* lbr_select mappings */
+       bool            lbr_double_abort;          /* duplicated lbr aborts */
 
        /*
         * Extra registers for events
index f31a1655d1ff5bd602239e211b14bdd28d95a79a..0fa4f242f0504ad53297360966e957b84a0edab8 100644 (file)
@@ -190,9 +190,9 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
        EVENT_EXTRA_END
 };
 
-EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
-EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
+EVENT_ATTR_STR(mem-loads,      mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
 
 struct attribute *nhm_events_attrs[] = {
        EVENT_PTR(mem_ld_nhm),
@@ -1184,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
        wrmsrl(hwc->config_base, ctrl_val);
 }
 
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+       return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
 static void intel_pmu_disable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
@@ -1197,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
 
        cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
        cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+       cpuc->intel_cp_status &= ~(1ull << hwc->idx);
 
        /*
         * must disable before any actual event
@@ -1271,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
        if (event->attr.exclude_guest)
                cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
 
+       if (unlikely(event_is_checkpointed(event)))
+               cpuc->intel_cp_status |= (1ull << hwc->idx);
+
        if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
                intel_pmu_enable_fixed(hwc);
                return;
@@ -1289,6 +1298,17 @@ static void intel_pmu_enable_event(struct perf_event *event)
 int intel_pmu_save_and_restart(struct perf_event *event)
 {
        x86_perf_event_update(event);
+       /*
+        * For a checkpointed counter always reset back to 0.  This
+        * avoids a situation where the counter overflows, aborts the
+        * transaction and is then set back to shortly before the
+        * overflow, and overflows and aborts again.
+        */
+       if (unlikely(event_is_checkpointed(event))) {
+               /* No race with NMIs because the counter should not be armed */
+               wrmsrl(event->hw.event_base, 0);
+               local64_set(&event->hw.prev_count, 0);
+       }
        return x86_perf_event_set_period(event);
 }
 
@@ -1372,6 +1392,13 @@ again:
                x86_pmu.drain_pebs(regs);
        }
 
+       /*
+        * Checkpointed counters can lead to 'spurious' PMIs because the
+        * rollback caused by the PMI will have cleared the overflow status
+        * bit. Therefore always force probe these counters.
+        */
+       status |= cpuc->intel_cp_status;
+
        for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
                struct perf_event *event = cpuc->events[bit];
 
@@ -1837,6 +1864,20 @@ static int hsw_hw_config(struct perf_event *event)
              event->attr.precise_ip > 0))
                return -EOPNOTSUPP;
 
+       if (event_is_checkpointed(event)) {
+               /*
+                * Sampling of checkpointed events can cause situations where
+                * the CPU constantly aborts because of a overflow, which is
+                * then checkpointed back and ignored. Forbid checkpointing
+                * for sampling.
+                *
+                * But still allow a long sampling period, so that perf stat
+                * from KVM works.
+                */
+               if (event->attr.sample_period > 0 &&
+                   event->attr.sample_period < 0x7fffffff)
+                       return -EOPNOTSUPP;
+       }
        return 0;
 }
 
@@ -2182,10 +2223,36 @@ static __init void intel_nehalem_quirk(void)
        }
 }
 
-EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start,       tx_start,       "event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit,      tx_commit,      "event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort,       tx_abort,       "event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity,    tx_capacity,    "event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict,    tx_conflict,    "event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start,       el_start,       "event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit,      el_commit,      "event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort,       el_abort,       "event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity,    el_capacity,    "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict,    el_conflict,    "event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t,       cycles_t,       "event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct,      cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
 
 static struct attribute *hsw_events_attrs[] = {
+       EVENT_PTR(tx_start),
+       EVENT_PTR(tx_commit),
+       EVENT_PTR(tx_abort),
+       EVENT_PTR(tx_capacity),
+       EVENT_PTR(tx_conflict),
+       EVENT_PTR(el_start),
+       EVENT_PTR(el_commit),
+       EVENT_PTR(el_abort),
+       EVENT_PTR(el_capacity),
+       EVENT_PTR(el_conflict),
+       EVENT_PTR(cycles_t),
+       EVENT_PTR(cycles_ct),
        EVENT_PTR(mem_ld_hsw),
        EVENT_PTR(mem_st_hsw),
        NULL
@@ -2452,6 +2519,7 @@ __init int intel_pmu_init(void)
                x86_pmu.hw_config = hsw_hw_config;
                x86_pmu.get_event_constraints = hsw_get_event_constraints;
                x86_pmu.cpu_events = hsw_events_attrs;
+               x86_pmu.lbr_double_abort = true;
                pr_cont("Haswell events, ");
                break;
 
index ab3ba1c1b7dd2c425dd5edf4c2b5091d76355b34..32e9ed81cd00e4717f9a3aa27e38ee7a4fe37ad6 100644 (file)
@@ -182,18 +182,32 @@ struct pebs_record_nhm {
  * Same as pebs_record_nhm, with two additional fields.
  */
 struct pebs_record_hsw {
-       struct pebs_record_nhm nhm;
-       /*
-        * Real IP of the event. In the Intel documentation this
-        * is called eventingrip.
-        */
-       u64 real_ip;
-       /*
-        * TSX tuning information field: abort cycles and abort flags.
-        */
-       u64 tsx_tuning;
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+       u64 status, dla, dse, lat;
+       u64 real_ip, tsx_tuning;
+};
+
+union hsw_tsx_tuning {
+       struct {
+               u32 cycles_last_block     : 32,
+                   hle_abort             : 1,
+                   rtm_abort             : 1,
+                   instruction_abort     : 1,
+                   non_instruction_abort : 1,
+                   retry                 : 1,
+                   data_conflict         : 1,
+                   capacity_writes       : 1,
+                   capacity_reads        : 1;
+       };
+       u64         value;
 };
 
+#define PEBS_HSW_TSX_FLAGS     0xff00000000ULL
+
 void init_debug_store_on_cpu(int cpu)
 {
        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -786,16 +800,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
        return 0;
 }
 
+static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
+{
+       if (pebs->tsx_tuning) {
+               union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+               return tsx.cycles_last_block;
+       }
+       return 0;
+}
+
+static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
+{
+       u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+
+       /* For RTM XABORTs also log the abort code from AX */
+       if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
+               txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+       return txn;
+}
+
 static void __intel_pmu_pebs_event(struct perf_event *event,
                                   struct pt_regs *iregs, void *__pebs)
 {
        /*
-        * We cast to pebs_record_nhm to get the load latency data
-        * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
+        * We cast to the biggest pebs_record but are careful not to
+        * unconditionally access the 'extra' entries.
         */
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct pebs_record_nhm *pebs = __pebs;
-       struct pebs_record_hsw *pebs_hsw = __pebs;
+       struct pebs_record_hsw *pebs = __pebs;
        struct perf_sample_data data;
        struct pt_regs regs;
        u64 sample_type;
@@ -854,7 +886,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
        regs.sp = pebs->sp;
 
        if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-               regs.ip = pebs_hsw->real_ip;
+               regs.ip = pebs->real_ip;
                regs.flags |= PERF_EFLAGS_EXACT;
        } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
                regs.flags |= PERF_EFLAGS_EXACT;
@@ -862,9 +894,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
                regs.flags &= ~PERF_EFLAGS_EXACT;
 
        if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
-               x86_pmu.intel_cap.pebs_format >= 1)
+           x86_pmu.intel_cap.pebs_format >= 1)
                data.addr = pebs->dla;
 
+       if (x86_pmu.intel_cap.pebs_format >= 2) {
+               /* Only set the TSX weight when no memory weight. */
+               if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll)
+                       data.weight = intel_hsw_weight(pebs);
+
+               if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION)
+                       data.txn = intel_hsw_transaction(pebs);
+       }
+
        if (has_branch_stack(event))
                data.br_stack = &cpuc->lbr_stack;
 
@@ -913,17 +954,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
        __intel_pmu_pebs_event(event, iregs, at);
 }
 
-static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
-                                       void *top)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct debug_store *ds = cpuc->ds;
        struct perf_event *event = NULL;
+       void *at, *top;
        u64 status = 0;
        int bit;
 
+       if (!x86_pmu.pebs_active)
+               return;
+
+       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
        ds->pebs_index = ds->pebs_buffer_base;
 
+       if (unlikely(at > top))
+               return;
+
+       /*
+        * Should not happen, we program the threshold at 1 and do not
+        * set a reset value.
+        */
+       WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
+                 "Unexpected number of pebs records %ld\n",
+                 (long)(top - at) / x86_pmu.pebs_record_size);
+
        for (; at < top; at += x86_pmu.pebs_record_size) {
                struct pebs_record_nhm *p = at;
 
@@ -951,61 +1009,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
        }
 }
 
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct pebs_record_nhm *at, *top;
-       int n;
-
-       if (!x86_pmu.pebs_active)
-               return;
-
-       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
-       ds->pebs_index = ds->pebs_buffer_base;
-
-       n = top - at;
-       if (n <= 0)
-               return;
-
-       /*
-        * Should not happen, we program the threshold at 1 and do not
-        * set a reset value.
-        */
-       WARN_ONCE(n > x86_pmu.max_pebs_events,
-                 "Unexpected number of pebs records %d\n", n);
-
-       return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
-static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct pebs_record_hsw *at, *top;
-       int n;
-
-       if (!x86_pmu.pebs_active)
-               return;
-
-       at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
-
-       n = top - at;
-       if (n <= 0)
-               return;
-       /*
-        * Should not happen, we program the threshold at 1 and do not
-        * set a reset value.
-        */
-       WARN_ONCE(n > x86_pmu.max_pebs_events,
-                 "Unexpected number of pebs records %d\n", n);
-
-       return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
 /*
  * BTS, PEBS probe and setup
  */
@@ -1040,7 +1043,7 @@ void intel_ds_init(void)
                case 2:
                        pr_cont("PEBS fmt2%c, ", pebs_type);
                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
                        break;
 
                default:
index d5be06a5005e99eb9ac8dbe808f565013bddc929..90ee6c1d0542664dbb9955ff2570e9f338e076e6 100644 (file)
@@ -284,6 +284,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
        int lbr_format = x86_pmu.intel_cap.lbr_format;
        u64 tos = intel_pmu_lbr_tos();
        int i;
+       int out = 0;
 
        for (i = 0; i < x86_pmu.lbr_nr; i++) {
                unsigned long lbr_idx = (tos - i) & mask;
@@ -306,15 +307,27 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
                }
                from = (u64)((((s64)from) << skip) >> skip);
 
-               cpuc->lbr_entries[i].from       = from;
-               cpuc->lbr_entries[i].to         = to;
-               cpuc->lbr_entries[i].mispred    = mis;
-               cpuc->lbr_entries[i].predicted  = pred;
-               cpuc->lbr_entries[i].in_tx      = in_tx;
-               cpuc->lbr_entries[i].abort      = abort;
-               cpuc->lbr_entries[i].reserved   = 0;
+               /*
+                * Some CPUs report duplicated abort records,
+                * with the second entry not having an abort bit set.
+                * Skip them here. This loop runs backwards,
+                * so we need to undo the previous record.
+                * If the abort just happened outside the window
+                * the extra entry cannot be removed.
+                */
+               if (abort && x86_pmu.lbr_double_abort && out > 0)
+                       out--;
+
+               cpuc->lbr_entries[out].from      = from;
+               cpuc->lbr_entries[out].to        = to;
+               cpuc->lbr_entries[out].mispred   = mis;
+               cpuc->lbr_entries[out].predicted = pred;
+               cpuc->lbr_entries[out].in_tx     = in_tx;
+               cpuc->lbr_entries[out].abort     = abort;
+               cpuc->lbr_entries[out].reserved  = 0;
+               out++;
        }
-       cpuc->lbr_stack.nr = i;
+       cpuc->lbr_stack.nr = out;
 }
 
 void intel_pmu_lbr_read(void)
index c8ba627c1d608733b8480bb929d9e81d97e57fa0..2e069d1288df5f04e6982243d7cac0f7455cd31d 100644 (file)
@@ -584,6 +584,10 @@ struct perf_sample_data {
        struct perf_regs_user           regs_user;
        u64                             stack_user_size;
        u64                             weight;
+       /*
+        * Transaction flags for abort events:
+        */
+       u64                             txn;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -599,6 +603,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
        data->stack_user_size = 0;
        data->weight = 0;
        data->data_src.val = 0;
+       data->txn = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
index 009a655a5d354c51e20fb34cb12ca653e94f9b71..da48837d617d0301609b9eca0c08687c1b27b1e1 100644 (file)
@@ -136,8 +136,9 @@ enum perf_event_sample_format {
        PERF_SAMPLE_WEIGHT                      = 1U << 14,
        PERF_SAMPLE_DATA_SRC                    = 1U << 15,
        PERF_SAMPLE_IDENTIFIER                  = 1U << 16,
+       PERF_SAMPLE_TRANSACTION                 = 1U << 17,
 
-       PERF_SAMPLE_MAX = 1U << 17,             /* non-ABI */
+       PERF_SAMPLE_MAX = 1U << 18,             /* non-ABI */
 };
 
 /*
@@ -180,6 +181,28 @@ enum perf_sample_regs_abi {
        PERF_SAMPLE_REGS_ABI_64         = 2,
 };
 
+/*
+ * Values for the memory transaction event qualifier, mostly for
+ * abort events. Multiple bits can be set.
+ */
+enum {
+       PERF_TXN_ELISION        = (1 << 0), /* From elision */
+       PERF_TXN_TRANSACTION    = (1 << 1), /* From transaction */
+       PERF_TXN_SYNC           = (1 << 2), /* Instruction is related */
+       PERF_TXN_ASYNC          = (1 << 3), /* Instruction not related */
+       PERF_TXN_RETRY          = (1 << 4), /* Retry possible */
+       PERF_TXN_CONFLICT       = (1 << 5), /* Conflict abort */
+       PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+       PERF_TXN_CAPACITY_READ  = (1 << 7), /* Capacity read abort */
+
+       PERF_TXN_MAX            = (1 << 8), /* non-ABI */
+
+       /* bits 32..63 are reserved for the abort code */
+
+       PERF_TXN_ABORT_MASK  = (0xffffffffULL << 32),
+       PERF_TXN_ABORT_SHIFT = 32,
+};
+
 /*
  * The format of the data returned by read() on a perf event fd,
  * as specified by attr.read_format:
index d49a9d29334cc4d67c24bad9814221a0371a6350..c716385f64833c2ca1e9f43526d4d9163c1db150 100644 (file)
@@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
-       int ret = proc_dointvec(table, write, buffer, lenp, ppos);
+       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
        if (ret || !write)
                return ret;
@@ -1201,6 +1201,9 @@ static void perf_event__header_size(struct perf_event *event)
        if (sample_type & PERF_SAMPLE_DATA_SRC)
                size += sizeof(data->data_src.val);
 
+       if (sample_type & PERF_SAMPLE_TRANSACTION)
+               size += sizeof(data->txn);
+
        event->header_size = size;
 }
 
@@ -4572,6 +4575,9 @@ void perf_output_sample(struct perf_output_handle *handle,
        if (sample_type & PERF_SAMPLE_DATA_SRC)
                perf_output_put(handle, data->data_src.val);
 
+       if (sample_type & PERF_SAMPLE_TRANSACTION)
+               perf_output_put(handle, data->txn);
+
        if (!event->attr.watermark) {
                int wakeup_events = event->attr.wakeup_events;
 
index b2f06f3c6a3ff32ec9f8f2b92bea0c4766ac6f7d..2a9db916c3f5fae1eb83fcea8567635a0c34758c 100644 (file)
@@ -1049,6 +1049,7 @@ static struct ctl_table kern_table[] = {
                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
                .mode           = 0644,
                .proc_handler   = perf_proc_update_handler,
+               .extra1         = &one,
        },
        {
                .procname       = "perf_cpu_time_max_percent",
index e297b74471b8a32e76912342f50f77534360ec87..f732eaa6a500b655a9b0503378bc5de99bc21294 100644 (file)
@@ -166,6 +166,9 @@ following filters are defined:
         - u:  only when the branch target is at the user level
         - k: only when the branch target is in the kernel
         - hv: only when the target is at the hypervisor level
+       - in_tx: only when the target is in a hardware transaction
+       - no_tx: only when the target is not in a hardware transaction
+       - abort_tx: only when the target is a hardware transaction abort
 
 +
 The option requires at least one branch type among any, any_call, any_ret, ind_call.
@@ -176,12 +179,14 @@ is enabled for all the sampling events. The sampled branch type is the same for
 The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
 Note that this feature may not be available on all processors.
 
--W::
 --weight::
 Enable weightened sampling. An additional weight is recorded per sample and can be
 displayed with the weight and local_weight sort keys.  This currently works for TSX
 abort events and some memory events in precise mode on modern Intel CPUs.
 
+--transaction::
+Record transaction flags for transaction related events.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
index 2b8097ee39d83c194fd8393a0a1a3e2b67e40b65..be5ad87b6c3dfb0b0f75a3eb8a49bc517dc4ac73 100644 (file)
@@ -71,7 +71,11 @@ OPTIONS
        entries are displayed as "[other]".
        - cpu: cpu number the task ran at the time of sample
        - srcline: filename and line number executed at the time of sample.  The
-       DWARF debuggin info must be provided.
+       DWARF debugging info must be provided.
+       - weight: Event specific weight, e.g. memory latency or transaction
+       abort cost. This is the global weight.
+       - local_weight: Local weight version of the weight above.
+       - transaction: Transaction abort flags.
 
        By default, comm, dso and symbol keys are used.
        (i.e. --sort comm,dso,symbol)
@@ -85,6 +89,8 @@ OPTIONS
        - symbol_from: name of function branched from
        - symbol_to: name of function branched to
        - mispredict: "N" for predicted branch, "Y" for mispredicted branch
+       - in_tx: branch in TSX transaction
+       - abort: TSX transaction abort.
 
        And default sort keys are changed to comm, dso_from, symbol_from, dso_to
        and symbol_to, see '--branch-stack'.
index 73c9759005a354eb8e052e7425f981bd16fb8c2a..80c7da6732f294782d86c51df8e8ba9225bf1d04 100644 (file)
@@ -137,6 +137,11 @@ core number and the number of online logical processors on that physical process
 After starting the program, wait msecs before measuring. This is useful to
 filter out the startup phase of the program, which is often very different.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 --------
 
index 58d6598a968679fb4c020f9d932443a57f9feee8..6d70fbfe28a2f37b6fcddd3df0e2850d8a76b2f9 100644 (file)
@@ -112,7 +112,8 @@ Default is to monitor all CPUS.
 
 -s::
 --sort::
-       Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight.
+       Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
+       local_weight, abort, in_tx, transaction
 
 -n::
 --show-nr-samples::
index 5ebd0c3b71b6aa45d80aa63b47661996577cf6a4..0393d980051642dc57b968a6342259f1782da82e 100644 (file)
@@ -63,7 +63,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
                return 0;
        }
 
-       he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1);
+       he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1, 0);
        if (he == NULL)
                return -ENOMEM;
 
index f28799e94f2a4bbbb1226da3cd78b908c7b1f610..2a78dc806c3969f58427ec0ed5445e060f05836a 100644 (file)
@@ -304,9 +304,10 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
 
 static int hists__add_entry(struct hists *self,
                            struct addr_location *al, u64 period,
-                           u64 weight)
+                           u64 weight, u64 transaction)
 {
-       if (__hists__add_entry(self, al, NULL, period, weight) != NULL)
+       if (__hists__add_entry(self, al, NULL, period, weight, transaction)
+           != NULL)
                return 0;
        return -ENOMEM;
 }
@@ -328,7 +329,8 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
        if (al.filtered)
                return 0;
 
-       if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight)) {
+       if (hists__add_entry(&evsel->hists, &al, sample->period,
+                            sample->weight, sample->transaction)) {
                pr_warning("problem incrementing symbol period, skipping event\n");
                return -1;
        }
index a41ac41546c962df3e0801b230f06b4aa9730c7a..a78db3f31b25576fc29afba5d1ce370b5914adc8 100644 (file)
@@ -618,6 +618,9 @@ static const struct branch_mode branch_modes[] = {
        BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
        BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
        BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+       BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+       BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+       BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
        BRANCH_END
 };
 
@@ -891,6 +894,8 @@ const struct option record_options[] = {
                     parse_branch_stack),
        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
                    "sample by weight (on special events only)"),
+       OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
+                   "sample transaction flags (special events only)"),
        OPT_END()
 };
 
index 72eae7498c09419c8f1f77a7a005c6dcbbb5eb4b..06e1abe351dd4cd55cea64cb6c82172e96b80db9 100644 (file)
@@ -259,7 +259,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
        }
 
        he = __hists__add_entry(&evsel->hists, al, parent, sample->period,
-                                       sample->weight);
+                               sample->weight, sample->transaction);
        if (he == NULL)
                return -ENOMEM;
 
@@ -787,7 +787,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
                   " dso_to, dso_from, symbol_to, symbol_from, mispredict,"
                   " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
-                  "snoop, locked"),
+                  "snoop, locked, abort, in_tx, transaction"),
        OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
                    "Show sample percentage for different cpu modes"),
        OPT_STRING('p', "parent", &parent_pattern, "regex",
index 5098f144b92defd53e94f9f24184e3ade0672928..700b478491015b6bcfff40f3f299316cc1d4f492 100644 (file)
@@ -46,6 +46,7 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/pmu.h"
 #include "util/event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+       "task-clock",
+       "{"
+       "instructions,"
+       "cycles,"
+       "cpu/cycles-t/,"
+       "cpu/tx-start/,"
+       "cpu/el-start/,"
+       "cpu/cycles-ct/"
+       "}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+       "task-clock",
+       "{"
+       "instructions,"
+       "cycles,"
+       "cpu/cycles-t/,"
+       "cpu/tx-start/"
+       "}"
+};
+
+/* must match transaction_attrs and the beginning limited_attrs */
+enum {
+       T_TASK_CLOCK,
+       T_INSTRUCTIONS,
+       T_CYCLES,
+       T_CYCLES_IN_TX,
+       T_TRANSACTION_START,
+       T_ELISION_START,
+       T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist      *evsel_list;
 
 static struct perf_target      target = {
@@ -90,6 +126,7 @@ static enum aggr_mode                aggr_mode                       = AGGR_GLOBAL;
 static volatile pid_t          child_pid                       = -1;
 static bool                    null_run                        =  false;
 static int                     detailed_run                    =  0;
+static bool                    transaction_run;
 static bool                    big_num                         =  true;
 static int                     big_num_opt                     =  -1;
 static const char              *csv_sep                        = NULL;
@@ -214,7 +251,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -236,6 +276,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
        memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
        memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
        memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+       memset(runtime_cycles_in_tx_stats, 0,
+                       sizeof(runtime_cycles_in_tx_stats));
+       memset(runtime_transaction_stats, 0,
+               sizeof(runtime_transaction_stats));
+       memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
        memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 }
 
@@ -274,6 +319,29 @@ static inline int nsec_counter(struct perf_evsel *evsel)
        return 0;
 }
 
+static struct perf_evsel *nth_evsel(int n)
+{
+       static struct perf_evsel **array;
+       static int array_len;
+       struct perf_evsel *ev;
+       int j;
+
+       /* Assumes this only called when evsel_list does not change anymore. */
+       if (!array) {
+               list_for_each_entry(ev, &evsel_list->entries, node)
+                       array_len++;
+               array = malloc(array_len * sizeof(void *));
+               if (!array)
+                       exit(ENOMEM);
+               j = 0;
+               list_for_each_entry(ev, &evsel_list->entries, node)
+                       array[j++] = ev;
+       }
+       if (n < array_len)
+               return array[n];
+       return NULL;
+}
+
 /*
  * Update various tracking values we maintain to print
  * more semantic information such as miss/hit ratios,
@@ -285,6 +353,15 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
                update_stats(&runtime_nsecs_stats[0], count[0]);
        else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
                update_stats(&runtime_cycles_stats[0], count[0]);
+       else if (transaction_run &&
+                perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
+               update_stats(&runtime_cycles_in_tx_stats[0], count[0]);
+       else if (transaction_run &&
+                perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
+               update_stats(&runtime_transaction_stats[0], count[0]);
+       else if (transaction_run &&
+                perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
+               update_stats(&runtime_elision_stats[0], count[0]);
        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
                update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -828,7 +905,7 @@ static void print_ll_cache_misses(int cpu,
 
 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 {
-       double total, ratio = 0.0;
+       double total, ratio = 0.0, total2;
        const char *fmt;
 
        if (csv_output)
@@ -924,6 +1001,43 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
                        ratio = 1.0 * avg / total;
 
                fprintf(output, " # %8.3f GHz                    ", ratio);
+       } else if (transaction_run &&
+                  perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
+               total = avg_stats(&runtime_cycles_stats[cpu]);
+               if (total)
+                       fprintf(output,
+                               " #   %5.2f%% transactional cycles   ",
+                               100.0 * (avg / total));
+       } else if (transaction_run &&
+                  perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
+               total = avg_stats(&runtime_cycles_stats[cpu]);
+               total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+               if (total2 < avg)
+                       total2 = avg;
+               if (total)
+                       fprintf(output,
+                               " #   %5.2f%% aborted cycles         ",
+                               100.0 * ((total2-avg) / total));
+       } else if (transaction_run &&
+                  perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
+                  avg > 0 &&
+                  runtime_cycles_in_tx_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+
+               if (total)
+                       ratio = total / avg;
+
+               fprintf(output, " # %8.0f cycles / transaction   ", ratio);
+       } else if (transaction_run &&
+                  perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
+                  avg > 0 &&
+                  runtime_cycles_in_tx_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+
+               if (total)
+                       ratio = total / avg;
+
+               fprintf(output, " # %8.0f cycles / elision       ", ratio);
        } else if (runtime_nsecs_stats[cpu].n != 0) {
                char unit = 'M';
 
@@ -1237,6 +1351,16 @@ static int perf_stat_init_aggr_mode(void)
        return 0;
 }
 
+static int setup_events(const char * const *attrs, unsigned len)
+{
+       unsigned i;
+
+       for (i = 0; i < len; i++) {
+               if (parse_events(evsel_list, attrs[i]))
+                       return -1;
+       }
+       return 0;
+}
 
 /*
  * Add default attributes, if there were no attributes specified or
@@ -1355,6 +1479,22 @@ static int add_default_attributes(void)
        if (null_run)
                return 0;
 
+       if (transaction_run) {
+               int err;
+               if (pmu_have_event("cpu", "cycles-ct") &&
+                   pmu_have_event("cpu", "el-start"))
+                       err = setup_events(transaction_attrs,
+                                       ARRAY_SIZE(transaction_attrs));
+               else
+                       err = setup_events(transaction_limited_attrs,
+                                ARRAY_SIZE(transaction_limited_attrs));
+               if (err < 0) {
+                       fprintf(stderr, "Cannot set up transaction events\n");
+                       return -1;
+               }
+               return 0;
+       }
+
        if (!evsel_list->nr_entries) {
                if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
                        return -1;
@@ -1389,6 +1529,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        int output_fd = 0;
        const char *output_name = NULL;
        const struct option options[] = {
+       OPT_BOOLEAN('T', "transaction", &transaction_run,
+                   "hardware transaction statistics"),
        OPT_CALLBACK('e', "event", &evsel_list, "event",
                     "event selector. use 'perf list' to list available events",
                     parse_events_option),
index 212214162bb2820286694417035e470e1d465e76..b3e0229ee38f181a9430a31bcc423803d90b1e57 100644 (file)
@@ -247,9 +247,8 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
        pthread_mutex_lock(&evsel->hists.lock);
        he = __hists__add_entry(&evsel->hists, al, NULL, sample->period,
-                               sample->weight);
+                               sample->weight, sample->transaction);
        pthread_mutex_unlock(&evsel->hists.lock);
-
        if (he == NULL)
                return NULL;
 
@@ -1103,7 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show counter open errors, etc)"),
        OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-                  "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"),
+                  "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight,"
+                  " abort, in_tx, transaction"),
        OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
                    "Show a column with the number of samples"),
        OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
index cf20187eee0a7345373dfc053c580bc1e77c4bb8..84502e88488b7403d705ece609518bad666f71e3 100644 (file)
@@ -182,7 +182,9 @@ struct ip_callchain {
 struct branch_flags {
        u64 mispred:1;
        u64 predicted:1;
-       u64 reserved:62;
+       u64 in_tx:1;
+       u64 abort:1;
+       u64 reserved:60;
 };
 
 struct branch_entry {
@@ -231,6 +233,7 @@ struct perf_record_opts {
        u64          default_interval;
        u64          user_interval;
        u16          stack_dump_size;
+       bool         sample_transaction;
 };
 
 #endif
index 4228ffc0d9681d7acda97ecacccf8bea9b76f1e4..025503a22ff722386bbd7e9a13fe8758dee66969 100644 (file)
@@ -222,7 +222,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
                                                          &sample) < 0)
                                goto out;
 
-                       he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
+                       he = __hists__add_entry(&evsel->hists, &al, NULL,
+                                               1, 1, 0);
                        if (he == NULL)
                                goto out;
 
@@ -244,7 +245,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
                                                          &sample) < 0)
                                goto out;
 
-                       he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
+                       he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1,
+                                               0);
                        if (he == NULL)
                                goto out;
 
index c67ecc457d295d029a2307c1b0ad7e1531b944d1..17d9e167a7b99af726d08ab019100362935cb747 100644 (file)
@@ -111,6 +111,7 @@ struct perf_sample {
        u64 stream_id;
        u64 period;
        u64 weight;
+       u64 transaction;
        u32 cpu;
        u32 raw_size;
        u64 data_src;
index 0ce9febf1ba0c8c1a691c74a0c55fa1c4a8dfd14..abe69af58b6288787b1b8f10c44ead3edc70b90d 100644 (file)
@@ -681,6 +681,9 @@ void perf_evsel__config(struct perf_evsel *evsel,
        attr->mmap2 = track && !perf_missing_features.mmap2;
        attr->comm  = track;
 
+       if (opts->sample_transaction)
+               attr->sample_type       |= PERF_SAMPLE_TRANSACTION;
+
        /*
         * XXX see the function comment above
         *
@@ -1470,6 +1473,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
                array++;
        }
 
+       data->transaction = 0;
+       if (type & PERF_SAMPLE_TRANSACTION) {
+               data->transaction = *array;
+               array++;
+       }
+
        return 0;
 }
 
index 4a7bdc713bab2fa4266069388ed79d530ba64b94..5aa68cddc7d9fff14bf6fdff9be38dcf2655a483 100644 (file)
@@ -197,6 +197,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1,
               (e1->attr.config == e2->attr.config);
 }
 
+#define perf_evsel__cmp(a, b)                  \
+       ((a) &&                                 \
+        (b) &&                                 \
+        (a)->attr.type == (b)->attr.type &&    \
+        (a)->attr.config == (b)->attr.config)
+
 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
                              int cpu, int thread, bool scale);
 
index 9ff6cf3e9a99f69596b37372f60203c11bec88a3..f3278a388e9a916c5e3c95f940f0fcc5be20b82d 100644 (file)
@@ -160,6 +160,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
        hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
        hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
        hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+
+       if (h->transaction)
+               hists__new_col_len(hists, HISTC_TRANSACTION,
+                                  hist_entry__transaction_len());
 }
 
 void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -346,7 +350,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
        struct rb_node **p;
        struct rb_node *parent = NULL;
        struct hist_entry *he;
-       int cmp;
+       int64_t cmp;
 
        p = &hists->entries_in->rb_node;
 
@@ -466,7 +470,7 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
 struct hist_entry *__hists__add_entry(struct hists *self,
                                      struct addr_location *al,
                                      struct symbol *sym_parent, u64 period,
-                                     u64 weight)
+                                     u64 weight, u64 transaction)
 {
        struct hist_entry entry = {
                .thread = al->thread,
@@ -487,6 +491,7 @@ struct hist_entry *__hists__add_entry(struct hists *self,
                .hists  = self,
                .branch_info = NULL,
                .mem_info = NULL,
+               .transaction = transaction,
        };
 
        return add_hist_entry(self, &entry, al, period, weight);
@@ -884,7 +889,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
        struct rb_node **p;
        struct rb_node *parent = NULL;
        struct hist_entry *he;
-       int cmp;
+       int64_t cmp;
 
        if (sort__need_collapse)
                root = &hists->entries_collapsed;
index 1329b6b6ffe61b0f5fb97d8582a837f554b3c752..6a048c09cd648ba966d4df185b0b194291df36e9 100644 (file)
@@ -45,6 +45,8 @@ enum hist_column {
        HISTC_CPU,
        HISTC_SRCLINE,
        HISTC_MISPREDICT,
+       HISTC_IN_TX,
+       HISTC_ABORT,
        HISTC_SYMBOL_FROM,
        HISTC_SYMBOL_TO,
        HISTC_DSO_FROM,
@@ -57,6 +59,7 @@ enum hist_column {
        HISTC_MEM_TLB,
        HISTC_MEM_LVL,
        HISTC_MEM_SNOOP,
+       HISTC_TRANSACTION,
        HISTC_NR_COLS, /* Last entry */
 };
 
@@ -82,9 +85,10 @@ struct hists {
 struct hist_entry *__hists__add_entry(struct hists *self,
                                      struct addr_location *al,
                                      struct symbol *parent, u64 period,
-                                     u64 weight);
+                                     u64 weight, u64 transaction);
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
+int hist_entry__transaction_len(void);
 int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size,
                              struct hists *hists);
 void hist_entry__free(struct hist_entry *);
index bc9d8069d37637835758d02e13ec3bc751d0f5e3..64362fe45b71f562d73c40594bc050c90ec01c3d 100644 (file)
@@ -637,3 +637,19 @@ void print_pmu_events(const char *event_glob, bool name_only)
                printf("\n");
        free(aliases);
 }
+
+bool pmu_have_event(const char *pname, const char *name)
+{
+       struct perf_pmu *pmu;
+       struct perf_pmu_alias *alias;
+
+       pmu = NULL;
+       while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+               if (strcmp(pname, pmu->name))
+                       continue;
+               list_for_each_entry(alias, &pmu->aliases, list)
+                       if (!strcmp(alias->name, name))
+                               return true;
+       }
+       return false;
+}
index 6b2cbe2d4cc3a6d59a3d692770793c19ea984930..1179b26f244a31280e5454787a51a8f01ff9ac2e 100644 (file)
@@ -42,6 +42,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
 
 void print_pmu_events(const char *event_glob, bool name_only);
+bool pmu_have_event(const char *pname, const char *name);
 
 int perf_pmu__test(void);
 #endif /* __PMU_H */
index 568b750c01f60b3d81cda29966ed40534a7bc8e1..b97f468af955df09e888bf12c2af02551beab08d 100644 (file)
@@ -860,6 +860,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
        if (sample_type & PERF_SAMPLE_DATA_SRC)
                printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
 
+       if (sample_type & PERF_SAMPLE_TRANSACTION)
+               printf("... transaction: %" PRIx64 "\n", sample->transaction);
+
        if (sample_type & PERF_SAMPLE_READ)
                sample_read__printf(sample, evsel->attr.read_format);
 }
index 5f118a089519a46bb6b370536660476bc6880eda..b4ecc0e4c908da0fe698849198b50963e6ef558a 100644 (file)
@@ -858,6 +858,127 @@ struct sort_entry sort_mem_snoop = {
        .se_width_idx   = HISTC_MEM_SNOOP,
 };
 
+static int64_t
+sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return left->branch_info->flags.abort !=
+               right->branch_info->flags.abort;
+}
+
+static int hist_entry__abort_snprintf(struct hist_entry *self, char *bf,
+                                   size_t size, unsigned int width)
+{
+       static const char *out = ".";
+
+       if (self->branch_info->flags.abort)
+               out = "A";
+       return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_abort = {
+       .se_header      = "Transaction abort",
+       .se_cmp         = sort__abort_cmp,
+       .se_snprintf    = hist_entry__abort_snprintf,
+       .se_width_idx   = HISTC_ABORT,
+};
+
+static int64_t
+sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return left->branch_info->flags.in_tx !=
+               right->branch_info->flags.in_tx;
+}
+
+static int hist_entry__in_tx_snprintf(struct hist_entry *self, char *bf,
+                                   size_t size, unsigned int width)
+{
+       static const char *out = ".";
+
+       if (self->branch_info->flags.in_tx)
+               out = "T";
+
+       return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_in_tx = {
+       .se_header      = "Branch in transaction",
+       .se_cmp         = sort__in_tx_cmp,
+       .se_snprintf    = hist_entry__in_tx_snprintf,
+       .se_width_idx   = HISTC_IN_TX,
+};
+
+static int64_t
+sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return left->transaction - right->transaction;
+}
+
+static inline char *add_str(char *p, const char *str)
+{
+       strcpy(p, str);
+       return p + strlen(str);
+}
+
+static struct txbit {
+       unsigned flag;
+       const char *name;
+       int skip_for_len;
+} txbits[] = {
+       { PERF_TXN_ELISION,        "EL ",        0 },
+       { PERF_TXN_TRANSACTION,    "TX ",        1 },
+       { PERF_TXN_SYNC,           "SYNC ",      1 },
+       { PERF_TXN_ASYNC,          "ASYNC ",     0 },
+       { PERF_TXN_RETRY,          "RETRY ",     0 },
+       { PERF_TXN_CONFLICT,       "CON ",       0 },
+       { PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 },
+       { PERF_TXN_CAPACITY_READ,  "CAP-READ ",  0 },
+       { 0, NULL, 0 }
+};
+
+int hist_entry__transaction_len(void)
+{
+       int i;
+       int len = 0;
+
+       for (i = 0; txbits[i].name; i++) {
+               if (!txbits[i].skip_for_len)
+                       len += strlen(txbits[i].name);
+       }
+       len += 4; /* :XX<space> */
+       return len;
+}
+
+static int hist_entry__transaction_snprintf(struct hist_entry *self, char *bf,
+                                           size_t size, unsigned int width)
+{
+       u64 t = self->transaction;
+       char buf[128];
+       char *p = buf;
+       int i;
+
+       buf[0] = 0;
+       for (i = 0; txbits[i].name; i++)
+               if (txbits[i].flag & t)
+                       p = add_str(p, txbits[i].name);
+       if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC)))
+               p = add_str(p, "NEITHER ");
+       if (t & PERF_TXN_ABORT_MASK) {
+               sprintf(p, ":%" PRIx64,
+                       (t & PERF_TXN_ABORT_MASK) >>
+                       PERF_TXN_ABORT_SHIFT);
+               p += strlen(p);
+       }
+
+       return repsep_snprintf(bf, size, "%-*s", width, buf);
+}
+
+struct sort_entry sort_transaction = {
+       .se_header      = "Transaction                ",
+       .se_cmp         = sort__transaction_cmp,
+       .se_snprintf    = hist_entry__transaction_snprintf,
+       .se_width_idx   = HISTC_TRANSACTION,
+};
+
 struct sort_dimension {
        const char              *name;
        struct sort_entry       *entry;
@@ -876,6 +997,7 @@ static struct sort_dimension common_sort_dimensions[] = {
        DIM(SORT_SRCLINE, "srcline", sort_srcline),
        DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
        DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
+       DIM(SORT_TRANSACTION, "transaction", sort_transaction),
 };
 
 #undef DIM
@@ -888,6 +1010,8 @@ static struct sort_dimension bstack_sort_dimensions[] = {
        DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
        DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
        DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+       DIM(SORT_IN_TX, "in_tx", sort_in_tx),
+       DIM(SORT_ABORT, "abort", sort_abort),
 };
 
 #undef DIM
index 4e80dbd271e77e245d41f2f36afdffdf8ae7ab4e..bf4333694d3ab108acd7ca0e873f26cf3cff1ca1 100644 (file)
@@ -85,6 +85,7 @@ struct hist_entry {
        struct map_symbol       ms;
        struct thread           *thread;
        u64                     ip;
+       u64                     transaction;
        s32                     cpu;
 
        struct hist_entry_diff  diff;
@@ -145,6 +146,7 @@ enum sort_type {
        SORT_SRCLINE,
        SORT_LOCAL_WEIGHT,
        SORT_GLOBAL_WEIGHT,
+       SORT_TRANSACTION,
 
        /* branch stack specific sort keys */
        __SORT_BRANCH_STACK,
@@ -153,6 +155,8 @@ enum sort_type {
        SORT_SYM_FROM,
        SORT_SYM_TO,
        SORT_MISPREDICT,
+       SORT_ABORT,
+       SORT_IN_TX,
 
        /* memory mode specific sort keys */
        __SORT_MEMORY_MODE,