]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/x86/kernel/cpu/perf_event.c
perf_events, x86: Implement Intel Westmere/Nehalem-EX support
[karo-tx-linux.git] / arch / x86 / kernel / cpu / perf_event.c
1 /*
2  * Performance events x86 architecture code
3  *
4  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6  *  Copyright (C) 2009 Jaswinder Singh Rajput
7  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10  *
11  *  For licencing details see kernel-base/COPYING
12  */
13
14 #include <linux/perf_event.h>
15 #include <linux/capability.h>
16 #include <linux/notifier.h>
17 #include <linux/hardirq.h>
18 #include <linux/kprobes.h>
19 #include <linux/module.h>
20 #include <linux/kdebug.h>
21 #include <linux/sched.h>
22 #include <linux/uaccess.h>
23 #include <linux/highmem.h>
24 #include <linux/cpu.h>
25
26 #include <asm/apic.h>
27 #include <asm/stacktrace.h>
28 #include <asm/nmi.h>
29
30 static u64 perf_event_mask __read_mostly;
31
32 /* The maximal number of PEBS events: */
33 #define MAX_PEBS_EVENTS 4
34
35 /* The size of a BTS record in bytes: */
36 #define BTS_RECORD_SIZE         24
37
38 /* The size of a per-cpu BTS buffer in bytes: */
39 #define BTS_BUFFER_SIZE         (BTS_RECORD_SIZE * 2048)
40
41 /* The BTS overflow threshold in bytes from the end of the buffer: */
42 #define BTS_OVFL_TH             (BTS_RECORD_SIZE * 128)
43
44
45 /*
46  * Bits in the debugctlmsr controlling branch tracing.
47  */
48 #define X86_DEBUGCTL_TR                 (1 << 6)
49 #define X86_DEBUGCTL_BTS                (1 << 7)
50 #define X86_DEBUGCTL_BTINT              (1 << 8)
51 #define X86_DEBUGCTL_BTS_OFF_OS         (1 << 9)
52 #define X86_DEBUGCTL_BTS_OFF_USR        (1 << 10)
53
54 /*
55  * A debug store configuration.
56  *
57  * We only support architectures that use 64bit fields.
58  */
59 struct debug_store {
60         u64     bts_buffer_base;
61         u64     bts_index;
62         u64     bts_absolute_maximum;
63         u64     bts_interrupt_threshold;
64         u64     pebs_buffer_base;
65         u64     pebs_index;
66         u64     pebs_absolute_maximum;
67         u64     pebs_interrupt_threshold;
68         u64     pebs_event_reset[MAX_PEBS_EVENTS];
69 };
70
71 struct cpu_hw_events {
72         struct perf_event       *events[X86_PMC_IDX_MAX];
73         unsigned long           used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75         unsigned long           interrupts;
76         int                     enabled;
77         struct debug_store      *ds;
78 };
79
80 struct event_constraint {
81         unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
82         int             code;
83 };
84
85 #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
86 #define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }
87
88 #define for_each_event_constraint(e, c) \
89         for ((e) = (c); (e)->idxmsk[0]; (e)++)
90
91
92 /*
93  * struct x86_pmu - generic x86 pmu
94  */
95 struct x86_pmu {
96         const char      *name;
97         int             version;
98         int             (*handle_irq)(struct pt_regs *);
99         void            (*disable_all)(void);
100         void            (*enable_all)(void);
101         void            (*enable)(struct hw_perf_event *, int);
102         void            (*disable)(struct hw_perf_event *, int);
103         unsigned        eventsel;
104         unsigned        perfctr;
105         u64             (*event_map)(int);
106         u64             (*raw_event)(u64);
107         int             max_events;
108         int             num_events;
109         int             num_events_fixed;
110         int             event_bits;
111         u64             event_mask;
112         int             apic;
113         u64             max_period;
114         u64             intel_ctrl;
115         void            (*enable_bts)(u64 config);
116         void            (*disable_bts)(void);
117         int             (*get_event_idx)(struct cpu_hw_events *cpuc,
118                                          struct hw_perf_event *hwc);
119 };
120
121 static struct x86_pmu x86_pmu __read_mostly;
122
123 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
124         .enabled = 1,
125 };
126
127 static const struct event_constraint *event_constraints;
128
129 /*
130  * Not sure about some of these
131  */
132 static const u64 p6_perfmon_event_map[] =
133 {
134   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0079,
135   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
136   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0f2e,
137   [PERF_COUNT_HW_CACHE_MISSES]          = 0x012e,
138   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
139   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
140   [PERF_COUNT_HW_BUS_CYCLES]            = 0x0062,
141 };
142
143 static u64 p6_pmu_event_map(int hw_event)
144 {
145         return p6_perfmon_event_map[hw_event];
146 }
147
148 /*
149  * Event setting that is specified not to count anything.
150  * We use this to effectively disable a counter.
151  *
152  * L2_RQSTS with 0 MESI unit mask.
153  */
154 #define P6_NOP_EVENT                    0x0000002EULL
155
156 static u64 p6_pmu_raw_event(u64 hw_event)
157 {
158 #define P6_EVNTSEL_EVENT_MASK           0x000000FFULL
159 #define P6_EVNTSEL_UNIT_MASK            0x0000FF00ULL
160 #define P6_EVNTSEL_EDGE_MASK            0x00040000ULL
161 #define P6_EVNTSEL_INV_MASK             0x00800000ULL
162 #define P6_EVNTSEL_REG_MASK             0xFF000000ULL
163
164 #define P6_EVNTSEL_MASK                 \
165         (P6_EVNTSEL_EVENT_MASK |        \
166          P6_EVNTSEL_UNIT_MASK  |        \
167          P6_EVNTSEL_EDGE_MASK  |        \
168          P6_EVNTSEL_INV_MASK   |        \
169          P6_EVNTSEL_REG_MASK)
170
171         return hw_event & P6_EVNTSEL_MASK;
172 }
173
174 static const struct event_constraint intel_p6_event_constraints[] =
175 {
176         EVENT_CONSTRAINT(0xc1, 0x1),    /* FLOPS */
177         EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
178         EVENT_CONSTRAINT(0x11, 0x1),    /* FP_ASSIST */
179         EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
180         EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
181         EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
182         EVENT_CONSTRAINT_END
183 };
184
185 /*
186  * Intel PerfMon v3. Used on Core2 and later.
187  */
188 static const u64 intel_perfmon_event_map[] =
189 {
190   [PERF_COUNT_HW_CPU_CYCLES]            = 0x003c,
191   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
192   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x4f2e,
193   [PERF_COUNT_HW_CACHE_MISSES]          = 0x412e,
194   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
195   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
196   [PERF_COUNT_HW_BUS_CYCLES]            = 0x013c,
197 };
198
199 static const struct event_constraint intel_core_event_constraints[] =
200 {
201         EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
202         EVENT_CONSTRAINT(0x11, 0x2),    /* FP_ASSIST */
203         EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
204         EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
205         EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
206         EVENT_CONSTRAINT(0x18, 0x1),    /* IDLE_DURING_DIV */
207         EVENT_CONSTRAINT(0x19, 0x2),    /* DELAYED_BYPASS */
208         EVENT_CONSTRAINT(0xa1, 0x1),    /* RS_UOPS_DISPATCH_CYCLES */
209         EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED */
210         EVENT_CONSTRAINT_END
211 };
212
213 static const struct event_constraint intel_nehalem_event_constraints[] =
214 {
215         EVENT_CONSTRAINT(0x40, 0x3),    /* L1D_CACHE_LD */
216         EVENT_CONSTRAINT(0x41, 0x3),    /* L1D_CACHE_ST */
217         EVENT_CONSTRAINT(0x42, 0x3),    /* L1D_CACHE_LOCK */
218         EVENT_CONSTRAINT(0x43, 0x3),    /* L1D_ALL_REF */
219         EVENT_CONSTRAINT(0x4e, 0x3),    /* L1D_PREFETCH */
220         EVENT_CONSTRAINT(0x4c, 0x3),    /* LOAD_HIT_PRE */
221         EVENT_CONSTRAINT(0x51, 0x3),    /* L1D */
222         EVENT_CONSTRAINT(0x52, 0x3),    /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
223         EVENT_CONSTRAINT(0x53, 0x3),    /* L1D_CACHE_LOCK_FB_HIT */
224         EVENT_CONSTRAINT(0xc5, 0x3),    /* CACHE_LOCK_CYCLES */
225         EVENT_CONSTRAINT_END
226 };
227
228 static u64 intel_pmu_event_map(int hw_event)
229 {
230         return intel_perfmon_event_map[hw_event];
231 }
232
233 /*
234  * Generalized hw caching related hw_event table, filled
235  * in on a per model basis. A value of 0 means
236  * 'not supported', -1 means 'hw_event makes no sense on
237  * this CPU', any other value means the raw hw_event
238  * ID.
239  */
240
241 #define C(x) PERF_COUNT_HW_CACHE_##x
242
243 static u64 __read_mostly hw_cache_event_ids
244                                 [PERF_COUNT_HW_CACHE_MAX]
245                                 [PERF_COUNT_HW_CACHE_OP_MAX]
246                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
247
248 static const u64 westmere_hw_cache_event_ids
249                                 [PERF_COUNT_HW_CACHE_MAX]
250                                 [PERF_COUNT_HW_CACHE_OP_MAX]
251                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
252 {
253  [ C(L1D) ] = {
254         [ C(OP_READ) ] = {
255                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
256                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
257         },
258         [ C(OP_WRITE) ] = {
259                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
260                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
261         },
262         [ C(OP_PREFETCH) ] = {
263                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
264                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
265         },
266  },
267  [ C(L1I ) ] = {
268         [ C(OP_READ) ] = {
269                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
270                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
271         },
272         [ C(OP_WRITE) ] = {
273                 [ C(RESULT_ACCESS) ] = -1,
274                 [ C(RESULT_MISS)   ] = -1,
275         },
276         [ C(OP_PREFETCH) ] = {
277                 [ C(RESULT_ACCESS) ] = 0x0,
278                 [ C(RESULT_MISS)   ] = 0x0,
279         },
280  },
281  [ C(LL  ) ] = {
282         [ C(OP_READ) ] = {
283                 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
284                 [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
285         },
286         [ C(OP_WRITE) ] = {
287                 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
288                 [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
289         },
290         [ C(OP_PREFETCH) ] = {
291                 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
292                 [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
293         },
294  },
295  [ C(DTLB) ] = {
296         [ C(OP_READ) ] = {
297                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
298                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
299         },
300         [ C(OP_WRITE) ] = {
301                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
302                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
303         },
304         [ C(OP_PREFETCH) ] = {
305                 [ C(RESULT_ACCESS) ] = 0x0,
306                 [ C(RESULT_MISS)   ] = 0x0,
307         },
308  },
309  [ C(ITLB) ] = {
310         [ C(OP_READ) ] = {
311                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
312                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
313         },
314         [ C(OP_WRITE) ] = {
315                 [ C(RESULT_ACCESS) ] = -1,
316                 [ C(RESULT_MISS)   ] = -1,
317         },
318         [ C(OP_PREFETCH) ] = {
319                 [ C(RESULT_ACCESS) ] = -1,
320                 [ C(RESULT_MISS)   ] = -1,
321         },
322  },
323  [ C(BPU ) ] = {
324         [ C(OP_READ) ] = {
325                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
326                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
327         },
328         [ C(OP_WRITE) ] = {
329                 [ C(RESULT_ACCESS) ] = -1,
330                 [ C(RESULT_MISS)   ] = -1,
331         },
332         [ C(OP_PREFETCH) ] = {
333                 [ C(RESULT_ACCESS) ] = -1,
334                 [ C(RESULT_MISS)   ] = -1,
335         },
336  },
337 };
338
339 static __initconst u64 nehalem_hw_cache_event_ids
340                                 [PERF_COUNT_HW_CACHE_MAX]
341                                 [PERF_COUNT_HW_CACHE_OP_MAX]
342                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
343 {
344  [ C(L1D) ] = {
345         [ C(OP_READ) ] = {
346                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
347                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
348         },
349         [ C(OP_WRITE) ] = {
350                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
351                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
352         },
353         [ C(OP_PREFETCH) ] = {
354                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
355                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
356         },
357  },
358  [ C(L1I ) ] = {
359         [ C(OP_READ) ] = {
360                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
361                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
362         },
363         [ C(OP_WRITE) ] = {
364                 [ C(RESULT_ACCESS) ] = -1,
365                 [ C(RESULT_MISS)   ] = -1,
366         },
367         [ C(OP_PREFETCH) ] = {
368                 [ C(RESULT_ACCESS) ] = 0x0,
369                 [ C(RESULT_MISS)   ] = 0x0,
370         },
371  },
372  [ C(LL  ) ] = {
373         [ C(OP_READ) ] = {
374                 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
375                 [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
376         },
377         [ C(OP_WRITE) ] = {
378                 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
379                 [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
380         },
381         [ C(OP_PREFETCH) ] = {
382                 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
383                 [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
384         },
385  },
386  [ C(DTLB) ] = {
387         [ C(OP_READ) ] = {
388                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
389                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
390         },
391         [ C(OP_WRITE) ] = {
392                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
393                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
394         },
395         [ C(OP_PREFETCH) ] = {
396                 [ C(RESULT_ACCESS) ] = 0x0,
397                 [ C(RESULT_MISS)   ] = 0x0,
398         },
399  },
400  [ C(ITLB) ] = {
401         [ C(OP_READ) ] = {
402                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
403                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
404         },
405         [ C(OP_WRITE) ] = {
406                 [ C(RESULT_ACCESS) ] = -1,
407                 [ C(RESULT_MISS)   ] = -1,
408         },
409         [ C(OP_PREFETCH) ] = {
410                 [ C(RESULT_ACCESS) ] = -1,
411                 [ C(RESULT_MISS)   ] = -1,
412         },
413  },
414  [ C(BPU ) ] = {
415         [ C(OP_READ) ] = {
416                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
417                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
418         },
419         [ C(OP_WRITE) ] = {
420                 [ C(RESULT_ACCESS) ] = -1,
421                 [ C(RESULT_MISS)   ] = -1,
422         },
423         [ C(OP_PREFETCH) ] = {
424                 [ C(RESULT_ACCESS) ] = -1,
425                 [ C(RESULT_MISS)   ] = -1,
426         },
427  },
428 };
429
430 static __initconst u64 core2_hw_cache_event_ids
431                                 [PERF_COUNT_HW_CACHE_MAX]
432                                 [PERF_COUNT_HW_CACHE_OP_MAX]
433                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
434 {
435  [ C(L1D) ] = {
436         [ C(OP_READ) ] = {
437                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
438                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
439         },
440         [ C(OP_WRITE) ] = {
441                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
442                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
443         },
444         [ C(OP_PREFETCH) ] = {
445                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
446                 [ C(RESULT_MISS)   ] = 0,
447         },
448  },
449  [ C(L1I ) ] = {
450         [ C(OP_READ) ] = {
451                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
452                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
453         },
454         [ C(OP_WRITE) ] = {
455                 [ C(RESULT_ACCESS) ] = -1,
456                 [ C(RESULT_MISS)   ] = -1,
457         },
458         [ C(OP_PREFETCH) ] = {
459                 [ C(RESULT_ACCESS) ] = 0,
460                 [ C(RESULT_MISS)   ] = 0,
461         },
462  },
463  [ C(LL  ) ] = {
464         [ C(OP_READ) ] = {
465                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
466                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
467         },
468         [ C(OP_WRITE) ] = {
469                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
470                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
471         },
472         [ C(OP_PREFETCH) ] = {
473                 [ C(RESULT_ACCESS) ] = 0,
474                 [ C(RESULT_MISS)   ] = 0,
475         },
476  },
477  [ C(DTLB) ] = {
478         [ C(OP_READ) ] = {
479                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
480                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
481         },
482         [ C(OP_WRITE) ] = {
483                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
484                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
485         },
486         [ C(OP_PREFETCH) ] = {
487                 [ C(RESULT_ACCESS) ] = 0,
488                 [ C(RESULT_MISS)   ] = 0,
489         },
490  },
491  [ C(ITLB) ] = {
492         [ C(OP_READ) ] = {
493                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
494                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
495         },
496         [ C(OP_WRITE) ] = {
497                 [ C(RESULT_ACCESS) ] = -1,
498                 [ C(RESULT_MISS)   ] = -1,
499         },
500         [ C(OP_PREFETCH) ] = {
501                 [ C(RESULT_ACCESS) ] = -1,
502                 [ C(RESULT_MISS)   ] = -1,
503         },
504  },
505  [ C(BPU ) ] = {
506         [ C(OP_READ) ] = {
507                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
508                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
509         },
510         [ C(OP_WRITE) ] = {
511                 [ C(RESULT_ACCESS) ] = -1,
512                 [ C(RESULT_MISS)   ] = -1,
513         },
514         [ C(OP_PREFETCH) ] = {
515                 [ C(RESULT_ACCESS) ] = -1,
516                 [ C(RESULT_MISS)   ] = -1,
517         },
518  },
519 };
520
521 static __initconst u64 atom_hw_cache_event_ids
522                                 [PERF_COUNT_HW_CACHE_MAX]
523                                 [PERF_COUNT_HW_CACHE_OP_MAX]
524                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
525 {
526  [ C(L1D) ] = {
527         [ C(OP_READ) ] = {
528                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
529                 [ C(RESULT_MISS)   ] = 0,
530         },
531         [ C(OP_WRITE) ] = {
532                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
533                 [ C(RESULT_MISS)   ] = 0,
534         },
535         [ C(OP_PREFETCH) ] = {
536                 [ C(RESULT_ACCESS) ] = 0x0,
537                 [ C(RESULT_MISS)   ] = 0,
538         },
539  },
540  [ C(L1I ) ] = {
541         [ C(OP_READ) ] = {
542                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
543                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
544         },
545         [ C(OP_WRITE) ] = {
546                 [ C(RESULT_ACCESS) ] = -1,
547                 [ C(RESULT_MISS)   ] = -1,
548         },
549         [ C(OP_PREFETCH) ] = {
550                 [ C(RESULT_ACCESS) ] = 0,
551                 [ C(RESULT_MISS)   ] = 0,
552         },
553  },
554  [ C(LL  ) ] = {
555         [ C(OP_READ) ] = {
556                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
557                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
558         },
559         [ C(OP_WRITE) ] = {
560                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
561                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
562         },
563         [ C(OP_PREFETCH) ] = {
564                 [ C(RESULT_ACCESS) ] = 0,
565                 [ C(RESULT_MISS)   ] = 0,
566         },
567  },
568  [ C(DTLB) ] = {
569         [ C(OP_READ) ] = {
570                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
571                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
572         },
573         [ C(OP_WRITE) ] = {
574                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
575                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
576         },
577         [ C(OP_PREFETCH) ] = {
578                 [ C(RESULT_ACCESS) ] = 0,
579                 [ C(RESULT_MISS)   ] = 0,
580         },
581  },
582  [ C(ITLB) ] = {
583         [ C(OP_READ) ] = {
584                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
585                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
586         },
587         [ C(OP_WRITE) ] = {
588                 [ C(RESULT_ACCESS) ] = -1,
589                 [ C(RESULT_MISS)   ] = -1,
590         },
591         [ C(OP_PREFETCH) ] = {
592                 [ C(RESULT_ACCESS) ] = -1,
593                 [ C(RESULT_MISS)   ] = -1,
594         },
595  },
596  [ C(BPU ) ] = {
597         [ C(OP_READ) ] = {
598                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
599                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
600         },
601         [ C(OP_WRITE) ] = {
602                 [ C(RESULT_ACCESS) ] = -1,
603                 [ C(RESULT_MISS)   ] = -1,
604         },
605         [ C(OP_PREFETCH) ] = {
606                 [ C(RESULT_ACCESS) ] = -1,
607                 [ C(RESULT_MISS)   ] = -1,
608         },
609  },
610 };
611
612 static u64 intel_pmu_raw_event(u64 hw_event)
613 {
614 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
615 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
616 #define CORE_EVNTSEL_EDGE_MASK          0x00040000ULL
617 #define CORE_EVNTSEL_INV_MASK           0x00800000ULL
618 #define CORE_EVNTSEL_REG_MASK           0xFF000000ULL
619
620 #define CORE_EVNTSEL_MASK               \
621         (CORE_EVNTSEL_EVENT_MASK |      \
622          CORE_EVNTSEL_UNIT_MASK  |      \
623          CORE_EVNTSEL_EDGE_MASK  |      \
624          CORE_EVNTSEL_INV_MASK  |       \
625          CORE_EVNTSEL_REG_MASK)
626
627         return hw_event & CORE_EVNTSEL_MASK;
628 }
629
630 static __initconst u64 amd_hw_cache_event_ids
631                                 [PERF_COUNT_HW_CACHE_MAX]
632                                 [PERF_COUNT_HW_CACHE_OP_MAX]
633                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
634 {
635  [ C(L1D) ] = {
636         [ C(OP_READ) ] = {
637                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
638                 [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
639         },
640         [ C(OP_WRITE) ] = {
641                 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
642                 [ C(RESULT_MISS)   ] = 0,
643         },
644         [ C(OP_PREFETCH) ] = {
645                 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
646                 [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
647         },
648  },
649  [ C(L1I ) ] = {
650         [ C(OP_READ) ] = {
651                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
652                 [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
653         },
654         [ C(OP_WRITE) ] = {
655                 [ C(RESULT_ACCESS) ] = -1,
656                 [ C(RESULT_MISS)   ] = -1,
657         },
658         [ C(OP_PREFETCH) ] = {
659                 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
660                 [ C(RESULT_MISS)   ] = 0,
661         },
662  },
663  [ C(LL  ) ] = {
664         [ C(OP_READ) ] = {
665                 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
666                 [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
667         },
668         [ C(OP_WRITE) ] = {
669                 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
670                 [ C(RESULT_MISS)   ] = 0,
671         },
672         [ C(OP_PREFETCH) ] = {
673                 [ C(RESULT_ACCESS) ] = 0,
674                 [ C(RESULT_MISS)   ] = 0,
675         },
676  },
677  [ C(DTLB) ] = {
678         [ C(OP_READ) ] = {
679                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
680                 [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
681         },
682         [ C(OP_WRITE) ] = {
683                 [ C(RESULT_ACCESS) ] = 0,
684                 [ C(RESULT_MISS)   ] = 0,
685         },
686         [ C(OP_PREFETCH) ] = {
687                 [ C(RESULT_ACCESS) ] = 0,
688                 [ C(RESULT_MISS)   ] = 0,
689         },
690  },
691  [ C(ITLB) ] = {
692         [ C(OP_READ) ] = {
693                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
694                 [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
695         },
696         [ C(OP_WRITE) ] = {
697                 [ C(RESULT_ACCESS) ] = -1,
698                 [ C(RESULT_MISS)   ] = -1,
699         },
700         [ C(OP_PREFETCH) ] = {
701                 [ C(RESULT_ACCESS) ] = -1,
702                 [ C(RESULT_MISS)   ] = -1,
703         },
704  },
705  [ C(BPU ) ] = {
706         [ C(OP_READ) ] = {
707                 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
708                 [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
709         },
710         [ C(OP_WRITE) ] = {
711                 [ C(RESULT_ACCESS) ] = -1,
712                 [ C(RESULT_MISS)   ] = -1,
713         },
714         [ C(OP_PREFETCH) ] = {
715                 [ C(RESULT_ACCESS) ] = -1,
716                 [ C(RESULT_MISS)   ] = -1,
717         },
718  },
719 };
720
721 /*
722  * AMD Performance Monitor K7 and later.
723  */
724 static const u64 amd_perfmon_event_map[] =
725 {
726   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0076,
727   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
728   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0080,
729   [PERF_COUNT_HW_CACHE_MISSES]          = 0x0081,
730   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
731   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
732 };
733
734 static u64 amd_pmu_event_map(int hw_event)
735 {
736         return amd_perfmon_event_map[hw_event];
737 }
738
739 static u64 amd_pmu_raw_event(u64 hw_event)
740 {
741 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
742 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
743 #define K7_EVNTSEL_EDGE_MASK    0x000040000ULL
744 #define K7_EVNTSEL_INV_MASK     0x000800000ULL
745 #define K7_EVNTSEL_REG_MASK     0x0FF000000ULL
746
747 #define K7_EVNTSEL_MASK                 \
748         (K7_EVNTSEL_EVENT_MASK |        \
749          K7_EVNTSEL_UNIT_MASK  |        \
750          K7_EVNTSEL_EDGE_MASK  |        \
751          K7_EVNTSEL_INV_MASK   |        \
752          K7_EVNTSEL_REG_MASK)
753
754         return hw_event & K7_EVNTSEL_MASK;
755 }
756
757 /*
758  * Propagate event elapsed time into the generic event.
759  * Can only be executed on the CPU where the event is active.
760  * Returns the delta events processed.
761  */
762 static u64
763 x86_perf_event_update(struct perf_event *event,
764                         struct hw_perf_event *hwc, int idx)
765 {
766         int shift = 64 - x86_pmu.event_bits;
767         u64 prev_raw_count, new_raw_count;
768         s64 delta;
769
770         if (idx == X86_PMC_IDX_FIXED_BTS)
771                 return 0;
772
773         /*
774          * Careful: an NMI might modify the previous event value.
775          *
776          * Our tactic to handle this is to first atomically read and
777          * exchange a new raw count - then add that new-prev delta
778          * count to the generic event atomically:
779          */
780 again:
781         prev_raw_count = atomic64_read(&hwc->prev_count);
782         rdmsrl(hwc->event_base + idx, new_raw_count);
783
784         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
785                                         new_raw_count) != prev_raw_count)
786                 goto again;
787
788         /*
789          * Now we have the new raw value and have updated the prev
790          * timestamp already. We can now calculate the elapsed delta
791          * (event-)time and add that to the generic event.
792          *
793          * Careful, not all hw sign-extends above the physical width
794          * of the count.
795          */
796         delta = (new_raw_count << shift) - (prev_raw_count << shift);
797         delta >>= shift;
798
799         atomic64_add(delta, &event->count);
800         atomic64_sub(delta, &hwc->period_left);
801
802         return new_raw_count;
803 }
804
805 static atomic_t active_events;
806 static DEFINE_MUTEX(pmc_reserve_mutex);
807
808 static bool reserve_pmc_hardware(void)
809 {
810 #ifdef CONFIG_X86_LOCAL_APIC
811         int i;
812
813         if (nmi_watchdog == NMI_LOCAL_APIC)
814                 disable_lapic_nmi_watchdog();
815
816         for (i = 0; i < x86_pmu.num_events; i++) {
817                 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
818                         goto perfctr_fail;
819         }
820
821         for (i = 0; i < x86_pmu.num_events; i++) {
822                 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
823                         goto eventsel_fail;
824         }
825 #endif
826
827         return true;
828
829 #ifdef CONFIG_X86_LOCAL_APIC
830 eventsel_fail:
831         for (i--; i >= 0; i--)
832                 release_evntsel_nmi(x86_pmu.eventsel + i);
833
834         i = x86_pmu.num_events;
835
836 perfctr_fail:
837         for (i--; i >= 0; i--)
838                 release_perfctr_nmi(x86_pmu.perfctr + i);
839
840         if (nmi_watchdog == NMI_LOCAL_APIC)
841                 enable_lapic_nmi_watchdog();
842
843         return false;
844 #endif
845 }
846
847 static void release_pmc_hardware(void)
848 {
849 #ifdef CONFIG_X86_LOCAL_APIC
850         int i;
851
852         for (i = 0; i < x86_pmu.num_events; i++) {
853                 release_perfctr_nmi(x86_pmu.perfctr + i);
854                 release_evntsel_nmi(x86_pmu.eventsel + i);
855         }
856
857         if (nmi_watchdog == NMI_LOCAL_APIC)
858                 enable_lapic_nmi_watchdog();
859 #endif
860 }
861
862 static inline bool bts_available(void)
863 {
864         return x86_pmu.enable_bts != NULL;
865 }
866
867 static inline void init_debug_store_on_cpu(int cpu)
868 {
869         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
870
871         if (!ds)
872                 return;
873
874         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
875                      (u32)((u64)(unsigned long)ds),
876                      (u32)((u64)(unsigned long)ds >> 32));
877 }
878
879 static inline void fini_debug_store_on_cpu(int cpu)
880 {
881         if (!per_cpu(cpu_hw_events, cpu).ds)
882                 return;
883
884         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
885 }
886
887 static void release_bts_hardware(void)
888 {
889         int cpu;
890
891         if (!bts_available())
892                 return;
893
894         get_online_cpus();
895
896         for_each_online_cpu(cpu)
897                 fini_debug_store_on_cpu(cpu);
898
899         for_each_possible_cpu(cpu) {
900                 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
901
902                 if (!ds)
903                         continue;
904
905                 per_cpu(cpu_hw_events, cpu).ds = NULL;
906
907                 kfree((void *)(unsigned long)ds->bts_buffer_base);
908                 kfree(ds);
909         }
910
911         put_online_cpus();
912 }
913
914 static int reserve_bts_hardware(void)
915 {
916         int cpu, err = 0;
917
918         if (!bts_available())
919                 return 0;
920
921         get_online_cpus();
922
923         for_each_possible_cpu(cpu) {
924                 struct debug_store *ds;
925                 void *buffer;
926
927                 err = -ENOMEM;
928                 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
929                 if (unlikely(!buffer))
930                         break;
931
932                 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
933                 if (unlikely(!ds)) {
934                         kfree(buffer);
935                         break;
936                 }
937
938                 ds->bts_buffer_base = (u64)(unsigned long)buffer;
939                 ds->bts_index = ds->bts_buffer_base;
940                 ds->bts_absolute_maximum =
941                         ds->bts_buffer_base + BTS_BUFFER_SIZE;
942                 ds->bts_interrupt_threshold =
943                         ds->bts_absolute_maximum - BTS_OVFL_TH;
944
945                 per_cpu(cpu_hw_events, cpu).ds = ds;
946                 err = 0;
947         }
948
949         if (err)
950                 release_bts_hardware();
951         else {
952                 for_each_online_cpu(cpu)
953                         init_debug_store_on_cpu(cpu);
954         }
955
956         put_online_cpus();
957
958         return err;
959 }
960
961 static void hw_perf_event_destroy(struct perf_event *event)
962 {
963         if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
964                 release_pmc_hardware();
965                 release_bts_hardware();
966                 mutex_unlock(&pmc_reserve_mutex);
967         }
968 }
969
970 static inline int x86_pmu_initialized(void)
971 {
972         return x86_pmu.handle_irq != NULL;
973 }
974
975 static inline int
976 set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
977 {
978         unsigned int cache_type, cache_op, cache_result;
979         u64 config, val;
980
981         config = attr->config;
982
983         cache_type = (config >>  0) & 0xff;
984         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
985                 return -EINVAL;
986
987         cache_op = (config >>  8) & 0xff;
988         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
989                 return -EINVAL;
990
991         cache_result = (config >> 16) & 0xff;
992         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
993                 return -EINVAL;
994
995         val = hw_cache_event_ids[cache_type][cache_op][cache_result];
996
997         if (val == 0)
998                 return -ENOENT;
999
1000         if (val == -1)
1001                 return -EINVAL;
1002
1003         hwc->config |= val;
1004
1005         return 0;
1006 }
1007
1008 static void intel_pmu_enable_bts(u64 config)
1009 {
1010         unsigned long debugctlmsr;
1011
1012         debugctlmsr = get_debugctlmsr();
1013
1014         debugctlmsr |= X86_DEBUGCTL_TR;
1015         debugctlmsr |= X86_DEBUGCTL_BTS;
1016         debugctlmsr |= X86_DEBUGCTL_BTINT;
1017
1018         if (!(config & ARCH_PERFMON_EVENTSEL_OS))
1019                 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
1020
1021         if (!(config & ARCH_PERFMON_EVENTSEL_USR))
1022                 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
1023
1024         update_debugctlmsr(debugctlmsr);
1025 }
1026
1027 static void intel_pmu_disable_bts(void)
1028 {
1029         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1030         unsigned long debugctlmsr;
1031
1032         if (!cpuc->ds)
1033                 return;
1034
1035         debugctlmsr = get_debugctlmsr();
1036
1037         debugctlmsr &=
1038                 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
1039                   X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
1040
1041         update_debugctlmsr(debugctlmsr);
1042 }
1043
1044 /*
1045  * Setup the hardware configuration for a given attr_type
1046  */
1047 static int __hw_perf_event_init(struct perf_event *event)
1048 {
1049         struct perf_event_attr *attr = &event->attr;
1050         struct hw_perf_event *hwc = &event->hw;
1051         u64 config;
1052         int err;
1053
1054         if (!x86_pmu_initialized())
1055                 return -ENODEV;
1056
1057         err = 0;
1058         if (!atomic_inc_not_zero(&active_events)) {
1059                 mutex_lock(&pmc_reserve_mutex);
1060                 if (atomic_read(&active_events) == 0) {
1061                         if (!reserve_pmc_hardware())
1062                                 err = -EBUSY;
1063                         else
1064                                 err = reserve_bts_hardware();
1065                 }
1066                 if (!err)
1067                         atomic_inc(&active_events);
1068                 mutex_unlock(&pmc_reserve_mutex);
1069         }
1070         if (err)
1071                 return err;
1072
1073         event->destroy = hw_perf_event_destroy;
1074
1075         /*
1076          * Generate PMC IRQs:
1077          * (keep 'enabled' bit clear for now)
1078          */
1079         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
1080
1081         hwc->idx = -1;
1082
1083         /*
1084          * Count user and OS events unless requested not to.
1085          */
1086         if (!attr->exclude_user)
1087                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
1088         if (!attr->exclude_kernel)
1089                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
1090
1091         if (!hwc->sample_period) {
1092                 hwc->sample_period = x86_pmu.max_period;
1093                 hwc->last_period = hwc->sample_period;
1094                 atomic64_set(&hwc->period_left, hwc->sample_period);
1095         } else {
1096                 /*
1097                  * If we have a PMU initialized but no APIC
1098                  * interrupts, we cannot sample hardware
1099                  * events (user-space has to fall back and
1100                  * sample via a hrtimer based software event):
1101                  */
1102                 if (!x86_pmu.apic)
1103                         return -EOPNOTSUPP;
1104         }
1105
1106         /*
1107          * Raw hw_event type provide the config in the hw_event structure
1108          */
1109         if (attr->type == PERF_TYPE_RAW) {
1110                 hwc->config |= x86_pmu.raw_event(attr->config);
1111                 return 0;
1112         }
1113
1114         if (attr->type == PERF_TYPE_HW_CACHE)
1115                 return set_ext_hw_attr(hwc, attr);
1116
1117         if (attr->config >= x86_pmu.max_events)
1118                 return -EINVAL;
1119
1120         /*
1121          * The generic map:
1122          */
1123         config = x86_pmu.event_map(attr->config);
1124
1125         if (config == 0)
1126                 return -ENOENT;
1127
1128         if (config == -1LL)
1129                 return -EINVAL;
1130
1131         /*
1132          * Branch tracing:
1133          */
1134         if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
1135             (hwc->sample_period == 1)) {
1136                 /* BTS is not supported by this architecture. */
1137                 if (!bts_available())
1138                         return -EOPNOTSUPP;
1139
1140                 /* BTS is currently only allowed for user-mode. */
1141                 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1142                         return -EOPNOTSUPP;
1143         }
1144
1145         hwc->config |= config;
1146
1147         return 0;
1148 }
1149
1150 static void p6_pmu_disable_all(void)
1151 {
1152         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1153         u64 val;
1154
1155         if (!cpuc->enabled)
1156                 return;
1157
1158         cpuc->enabled = 0;
1159         barrier();
1160
1161         /* p6 only has one enable register */
1162         rdmsrl(MSR_P6_EVNTSEL0, val);
1163         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1164         wrmsrl(MSR_P6_EVNTSEL0, val);
1165 }
1166
1167 static void intel_pmu_disable_all(void)
1168 {
1169         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1170
1171         if (!cpuc->enabled)
1172                 return;
1173
1174         cpuc->enabled = 0;
1175         barrier();
1176
1177         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1178
1179         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1180                 intel_pmu_disable_bts();
1181 }
1182
1183 static void amd_pmu_disable_all(void)
1184 {
1185         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1186         int idx;
1187
1188         if (!cpuc->enabled)
1189                 return;
1190
1191         cpuc->enabled = 0;
1192         /*
1193          * ensure we write the disable before we start disabling the
1194          * events proper, so that amd_pmu_enable_event() does the
1195          * right thing.
1196          */
1197         barrier();
1198
1199         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1200                 u64 val;
1201
1202                 if (!test_bit(idx, cpuc->active_mask))
1203                         continue;
1204                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
1205                 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
1206                         continue;
1207                 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1208                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1209         }
1210 }
1211
1212 void hw_perf_disable(void)
1213 {
1214         if (!x86_pmu_initialized())
1215                 return;
1216         return x86_pmu.disable_all();
1217 }
1218
1219 static void p6_pmu_enable_all(void)
1220 {
1221         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1222         unsigned long val;
1223
1224         if (cpuc->enabled)
1225                 return;
1226
1227         cpuc->enabled = 1;
1228         barrier();
1229
1230         /* p6 only has one enable register */
1231         rdmsrl(MSR_P6_EVNTSEL0, val);
1232         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1233         wrmsrl(MSR_P6_EVNTSEL0, val);
1234 }
1235
1236 static void intel_pmu_enable_all(void)
1237 {
1238         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1239
1240         if (cpuc->enabled)
1241                 return;
1242
1243         cpuc->enabled = 1;
1244         barrier();
1245
1246         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1247
1248         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1249                 struct perf_event *event =
1250                         cpuc->events[X86_PMC_IDX_FIXED_BTS];
1251
1252                 if (WARN_ON_ONCE(!event))
1253                         return;
1254
1255                 intel_pmu_enable_bts(event->hw.config);
1256         }
1257 }
1258
1259 static void amd_pmu_enable_all(void)
1260 {
1261         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1262         int idx;
1263
1264         if (cpuc->enabled)
1265                 return;
1266
1267         cpuc->enabled = 1;
1268         barrier();
1269
1270         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1271                 struct perf_event *event = cpuc->events[idx];
1272                 u64 val;
1273
1274                 if (!test_bit(idx, cpuc->active_mask))
1275                         continue;
1276
1277                 val = event->hw.config;
1278                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1279                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1280         }
1281 }
1282
1283 void hw_perf_enable(void)
1284 {
1285         if (!x86_pmu_initialized())
1286                 return;
1287         x86_pmu.enable_all();
1288 }
1289
1290 static inline u64 intel_pmu_get_status(void)
1291 {
1292         u64 status;
1293
1294         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1295
1296         return status;
1297 }
1298
1299 static inline void intel_pmu_ack_status(u64 ack)
1300 {
1301         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
1302 }
1303
1304 static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1305 {
1306         (void)checking_wrmsrl(hwc->config_base + idx,
1307                               hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1308 }
1309
1310 static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1311 {
1312         (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1313 }
1314
1315 static inline void
1316 intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1317 {
1318         int idx = __idx - X86_PMC_IDX_FIXED;
1319         u64 ctrl_val, mask;
1320
1321         mask = 0xfULL << (idx * 4);
1322
1323         rdmsrl(hwc->config_base, ctrl_val);
1324         ctrl_val &= ~mask;
1325         (void)checking_wrmsrl(hwc->config_base, ctrl_val);
1326 }
1327
1328 static inline void
1329 p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1330 {
1331         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1332         u64 val = P6_NOP_EVENT;
1333
1334         if (cpuc->enabled)
1335                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1336
1337         (void)checking_wrmsrl(hwc->config_base + idx, val);
1338 }
1339
1340 static inline void
1341 intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1342 {
1343         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1344                 intel_pmu_disable_bts();
1345                 return;
1346         }
1347
1348         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1349                 intel_pmu_disable_fixed(hwc, idx);
1350                 return;
1351         }
1352
1353         x86_pmu_disable_event(hwc, idx);
1354 }
1355
1356 static inline void
1357 amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1358 {
1359         x86_pmu_disable_event(hwc, idx);
1360 }
1361
1362 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1363
1364 /*
1365  * Set the next IRQ period, based on the hwc->period_left value.
1366  * To be called with the event disabled in hw:
1367  */
1368 static int
1369 x86_perf_event_set_period(struct perf_event *event,
1370                              struct hw_perf_event *hwc, int idx)
1371 {
1372         s64 left = atomic64_read(&hwc->period_left);
1373         s64 period = hwc->sample_period;
1374         int err, ret = 0;
1375
1376         if (idx == X86_PMC_IDX_FIXED_BTS)
1377                 return 0;
1378
1379         /*
1380          * If we are way outside a reasonable range then just skip forward:
1381          */
1382         if (unlikely(left <= -period)) {
1383                 left = period;
1384                 atomic64_set(&hwc->period_left, left);
1385                 hwc->last_period = period;
1386                 ret = 1;
1387         }
1388
1389         if (unlikely(left <= 0)) {
1390                 left += period;
1391                 atomic64_set(&hwc->period_left, left);
1392                 hwc->last_period = period;
1393                 ret = 1;
1394         }
1395         /*
1396          * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1397          */
1398         if (unlikely(left < 2))
1399                 left = 2;
1400
1401         if (left > x86_pmu.max_period)
1402                 left = x86_pmu.max_period;
1403
1404         per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1405
1406         /*
1407          * The hw event starts counting from this event offset,
1408          * mark it to be able to extra future deltas:
1409          */
1410         atomic64_set(&hwc->prev_count, (u64)-left);
1411
1412         err = checking_wrmsrl(hwc->event_base + idx,
1413                              (u64)(-left) & x86_pmu.event_mask);
1414
1415         perf_event_update_userpage(event);
1416
1417         return ret;
1418 }
1419
1420 static inline void
1421 intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1422 {
1423         int idx = __idx - X86_PMC_IDX_FIXED;
1424         u64 ctrl_val, bits, mask;
1425         int err;
1426
1427         /*
1428          * Enable IRQ generation (0x8),
1429          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1430          * if requested:
1431          */
1432         bits = 0x8ULL;
1433         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1434                 bits |= 0x2;
1435         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1436                 bits |= 0x1;
1437
1438         /*
1439          * ANY bit is supported in v3 and up
1440          */
1441         if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
1442                 bits |= 0x4;
1443
1444         bits <<= (idx * 4);
1445         mask = 0xfULL << (idx * 4);
1446
1447         rdmsrl(hwc->config_base, ctrl_val);
1448         ctrl_val &= ~mask;
1449         ctrl_val |= bits;
1450         err = checking_wrmsrl(hwc->config_base, ctrl_val);
1451 }
1452
1453 static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1454 {
1455         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1456         u64 val;
1457
1458         val = hwc->config;
1459         if (cpuc->enabled)
1460                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1461
1462         (void)checking_wrmsrl(hwc->config_base + idx, val);
1463 }
1464
1465
1466 static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1467 {
1468         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1469                 if (!__get_cpu_var(cpu_hw_events).enabled)
1470                         return;
1471
1472                 intel_pmu_enable_bts(hwc->config);
1473                 return;
1474         }
1475
1476         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1477                 intel_pmu_enable_fixed(hwc, idx);
1478                 return;
1479         }
1480
1481         x86_pmu_enable_event(hwc, idx);
1482 }
1483
1484 static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1485 {
1486         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1487
1488         if (cpuc->enabled)
1489                 x86_pmu_enable_event(hwc, idx);
1490 }
1491
1492 static int fixed_mode_idx(struct hw_perf_event *hwc)
1493 {
1494         unsigned int hw_event;
1495
1496         hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1497
1498         if (unlikely((hw_event ==
1499                       x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
1500                      (hwc->sample_period == 1)))
1501                 return X86_PMC_IDX_FIXED_BTS;
1502
1503         if (!x86_pmu.num_events_fixed)
1504                 return -1;
1505
1506         /*
1507          * fixed counters do not take all possible filters
1508          */
1509         if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
1510                 return -1;
1511
1512         if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1513                 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1514         if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1515                 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1516         if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1517                 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1518
1519         return -1;
1520 }
1521
1522 /*
1523  * generic counter allocator: get next free counter
1524  */
1525 static int
1526 gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1527 {
1528         int idx;
1529
1530         idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
1531         return idx == x86_pmu.num_events ? -1 : idx;
1532 }
1533
1534 /*
1535  * intel-specific counter allocator: check event constraints
1536  */
1537 static int
1538 intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1539 {
1540         const struct event_constraint *event_constraint;
1541         int i, code;
1542
1543         if (!event_constraints)
1544                 goto skip;
1545
1546         code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
1547
1548         for_each_event_constraint(event_constraint, event_constraints) {
1549                 if (code == event_constraint->code) {
1550                         for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
1551                                 if (!test_and_set_bit(i, cpuc->used_mask))
1552                                         return i;
1553                         }
1554                         return -1;
1555                 }
1556         }
1557 skip:
1558         return gen_get_event_idx(cpuc, hwc);
1559 }
1560
1561 static int
1562 x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1563 {
1564         int idx;
1565
1566         idx = fixed_mode_idx(hwc);
1567         if (idx == X86_PMC_IDX_FIXED_BTS) {
1568                 /* BTS is already occupied. */
1569                 if (test_and_set_bit(idx, cpuc->used_mask))
1570                         return -EAGAIN;
1571
1572                 hwc->config_base        = 0;
1573                 hwc->event_base         = 0;
1574                 hwc->idx                = idx;
1575         } else if (idx >= 0) {
1576                 /*
1577                  * Try to get the fixed event, if that is already taken
1578                  * then try to get a generic event:
1579                  */
1580                 if (test_and_set_bit(idx, cpuc->used_mask))
1581                         goto try_generic;
1582
1583                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1584                 /*
1585                  * We set it so that event_base + idx in wrmsr/rdmsr maps to
1586                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1587                  */
1588                 hwc->event_base =
1589                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1590                 hwc->idx = idx;
1591         } else {
1592                 idx = hwc->idx;
1593                 /* Try to get the previous generic event again */
1594                 if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1595 try_generic:
1596                         idx = x86_pmu.get_event_idx(cpuc, hwc);
1597                         if (idx == -1)
1598                                 return -EAGAIN;
1599
1600                         set_bit(idx, cpuc->used_mask);
1601                         hwc->idx = idx;
1602                 }
1603                 hwc->config_base = x86_pmu.eventsel;
1604                 hwc->event_base  = x86_pmu.perfctr;
1605         }
1606
1607         return idx;
1608 }
1609
1610 /*
1611  * Find a PMC slot for the freshly enabled / scheduled in event:
1612  */
1613 static int x86_pmu_enable(struct perf_event *event)
1614 {
1615         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1616         struct hw_perf_event *hwc = &event->hw;
1617         int idx;
1618
1619         idx = x86_schedule_event(cpuc, hwc);
1620         if (idx < 0)
1621                 return idx;
1622
1623         perf_events_lapic_init();
1624
1625         x86_pmu.disable(hwc, idx);
1626
1627         cpuc->events[idx] = event;
1628         set_bit(idx, cpuc->active_mask);
1629
1630         x86_perf_event_set_period(event, hwc, idx);
1631         x86_pmu.enable(hwc, idx);
1632
1633         perf_event_update_userpage(event);
1634
1635         return 0;
1636 }
1637
1638 static void x86_pmu_unthrottle(struct perf_event *event)
1639 {
1640         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1641         struct hw_perf_event *hwc = &event->hw;
1642
1643         if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1644                                 cpuc->events[hwc->idx] != event))
1645                 return;
1646
1647         x86_pmu.enable(hwc, hwc->idx);
1648 }
1649
1650 void perf_event_print_debug(void)
1651 {
1652         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1653         struct cpu_hw_events *cpuc;
1654         unsigned long flags;
1655         int cpu, idx;
1656
1657         if (!x86_pmu.num_events)
1658                 return;
1659
1660         local_irq_save(flags);
1661
1662         cpu = smp_processor_id();
1663         cpuc = &per_cpu(cpu_hw_events, cpu);
1664
1665         if (x86_pmu.version >= 2) {
1666                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1667                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1668                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1669                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1670
1671                 pr_info("\n");
1672                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
1673                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
1674                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
1675                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1676         }
1677         pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1678
1679         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1680                 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1681                 rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
1682
1683                 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1684
1685                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
1686                         cpu, idx, pmc_ctrl);
1687                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
1688                         cpu, idx, pmc_count);
1689                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1690                         cpu, idx, prev_left);
1691         }
1692         for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1693                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1694
1695                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1696                         cpu, idx, pmc_count);
1697         }
1698         local_irq_restore(flags);
1699 }
1700
1701 static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1702 {
1703         struct debug_store *ds = cpuc->ds;
1704         struct bts_record {
1705                 u64     from;
1706                 u64     to;
1707                 u64     flags;
1708         };
1709         struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1710         struct bts_record *at, *top;
1711         struct perf_output_handle handle;
1712         struct perf_event_header header;
1713         struct perf_sample_data data;
1714         struct pt_regs regs;
1715
1716         if (!event)
1717                 return;
1718
1719         if (!ds)
1720                 return;
1721
1722         at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1723         top = (struct bts_record *)(unsigned long)ds->bts_index;
1724
1725         if (top <= at)
1726                 return;
1727
1728         ds->bts_index = ds->bts_buffer_base;
1729
1730         perf_sample_data_init(&data, 0);
1731
1732         data.period     = event->hw.last_period;
1733         regs.ip         = 0;
1734
1735         /*
1736          * Prepare a generic sample, i.e. fill in the invariant fields.
1737          * We will overwrite the from and to address before we output
1738          * the sample.
1739          */
1740         perf_prepare_sample(&header, &data, event, &regs);
1741
1742         if (perf_output_begin(&handle, event,
1743                               header.size * (top - at), 1, 1))
1744                 return;
1745
1746         for (; at < top; at++) {
1747                 data.ip         = at->from;
1748                 data.addr       = at->to;
1749
1750                 perf_output_sample(&handle, &header, &data, event);
1751         }
1752
1753         perf_output_end(&handle);
1754
1755         /* There's new data available. */
1756         event->hw.interrupts++;
1757         event->pending_kill = POLL_IN;
1758 }
1759
1760 static void x86_pmu_disable(struct perf_event *event)
1761 {
1762         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1763         struct hw_perf_event *hwc = &event->hw;
1764         int idx = hwc->idx;
1765
1766         /*
1767          * Must be done before we disable, otherwise the nmi handler
1768          * could reenable again:
1769          */
1770         clear_bit(idx, cpuc->active_mask);
1771         x86_pmu.disable(hwc, idx);
1772
1773         /*
1774          * Make sure the cleared pointer becomes visible before we
1775          * (potentially) free the event:
1776          */
1777         barrier();
1778
1779         /*
1780          * Drain the remaining delta count out of a event
1781          * that we are disabling:
1782          */
1783         x86_perf_event_update(event, hwc, idx);
1784
1785         /* Drain the remaining BTS records. */
1786         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1787                 intel_pmu_drain_bts_buffer(cpuc);
1788
1789         cpuc->events[idx] = NULL;
1790         clear_bit(idx, cpuc->used_mask);
1791
1792         perf_event_update_userpage(event);
1793 }
1794
1795 /*
1796  * Save and restart an expired event. Called by NMI contexts,
1797  * so it has to be careful about preempting normal event ops:
1798  */
1799 static int intel_pmu_save_and_restart(struct perf_event *event)
1800 {
1801         struct hw_perf_event *hwc = &event->hw;
1802         int idx = hwc->idx;
1803         int ret;
1804
1805         x86_perf_event_update(event, hwc, idx);
1806         ret = x86_perf_event_set_period(event, hwc, idx);
1807
1808         if (event->state == PERF_EVENT_STATE_ACTIVE)
1809                 intel_pmu_enable_event(hwc, idx);
1810
1811         return ret;
1812 }
1813
1814 static void intel_pmu_reset(void)
1815 {
1816         struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1817         unsigned long flags;
1818         int idx;
1819
1820         if (!x86_pmu.num_events)
1821                 return;
1822
1823         local_irq_save(flags);
1824
1825         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1826
1827         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1828                 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1829                 checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
1830         }
1831         for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1832                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1833         }
1834         if (ds)
1835                 ds->bts_index = ds->bts_buffer_base;
1836
1837         local_irq_restore(flags);
1838 }
1839
1840 static int p6_pmu_handle_irq(struct pt_regs *regs)
1841 {
1842         struct perf_sample_data data;
1843         struct cpu_hw_events *cpuc;
1844         struct perf_event *event;
1845         struct hw_perf_event *hwc;
1846         int idx, handled = 0;
1847         u64 val;
1848
1849         perf_sample_data_init(&data, 0);
1850
1851         cpuc = &__get_cpu_var(cpu_hw_events);
1852
1853         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1854                 if (!test_bit(idx, cpuc->active_mask))
1855                         continue;
1856
1857                 event = cpuc->events[idx];
1858                 hwc = &event->hw;
1859
1860                 val = x86_perf_event_update(event, hwc, idx);
1861                 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1862                         continue;
1863
1864                 /*
1865                  * event overflow
1866                  */
1867                 handled         = 1;
1868                 data.period     = event->hw.last_period;
1869
1870                 if (!x86_perf_event_set_period(event, hwc, idx))
1871                         continue;
1872
1873                 if (perf_event_overflow(event, 1, &data, regs))
1874                         p6_pmu_disable_event(hwc, idx);
1875         }
1876
1877         if (handled)
1878                 inc_irq_stat(apic_perf_irqs);
1879
1880         return handled;
1881 }
1882
1883 /*
1884  * This handler is triggered by the local APIC, so the APIC IRQ handling
1885  * rules apply:
1886  */
1887 static int intel_pmu_handle_irq(struct pt_regs *regs)
1888 {
1889         struct perf_sample_data data;
1890         struct cpu_hw_events *cpuc;
1891         int bit, loops;
1892         u64 ack, status;
1893
1894         perf_sample_data_init(&data, 0);
1895
1896         cpuc = &__get_cpu_var(cpu_hw_events);
1897
1898         perf_disable();
1899         intel_pmu_drain_bts_buffer(cpuc);
1900         status = intel_pmu_get_status();
1901         if (!status) {
1902                 perf_enable();
1903                 return 0;
1904         }
1905
1906         loops = 0;
1907 again:
1908         if (++loops > 100) {
1909                 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1910                 perf_event_print_debug();
1911                 intel_pmu_reset();
1912                 perf_enable();
1913                 return 1;
1914         }
1915
1916         inc_irq_stat(apic_perf_irqs);
1917         ack = status;
1918         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1919                 struct perf_event *event = cpuc->events[bit];
1920
1921                 clear_bit(bit, (unsigned long *) &status);
1922                 if (!test_bit(bit, cpuc->active_mask))
1923                         continue;
1924
1925                 if (!intel_pmu_save_and_restart(event))
1926                         continue;
1927
1928                 data.period = event->hw.last_period;
1929
1930                 if (perf_event_overflow(event, 1, &data, regs))
1931                         intel_pmu_disable_event(&event->hw, bit);
1932         }
1933
1934         intel_pmu_ack_status(ack);
1935
1936         /*
1937          * Repeat if there is more work to be done:
1938          */
1939         status = intel_pmu_get_status();
1940         if (status)
1941                 goto again;
1942
1943         perf_enable();
1944
1945         return 1;
1946 }
1947
1948 static int amd_pmu_handle_irq(struct pt_regs *regs)
1949 {
1950         struct perf_sample_data data;
1951         struct cpu_hw_events *cpuc;
1952         struct perf_event *event;
1953         struct hw_perf_event *hwc;
1954         int idx, handled = 0;
1955         u64 val;
1956
1957         data.addr = 0;
1958         data.raw = NULL;
1959
1960         cpuc = &__get_cpu_var(cpu_hw_events);
1961
1962         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1963                 if (!test_bit(idx, cpuc->active_mask))
1964                         continue;
1965
1966                 event = cpuc->events[idx];
1967                 hwc = &event->hw;
1968
1969                 val = x86_perf_event_update(event, hwc, idx);
1970                 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1971                         continue;
1972
1973                 /*
1974                  * event overflow
1975                  */
1976                 handled         = 1;
1977                 data.period     = event->hw.last_period;
1978
1979                 if (!x86_perf_event_set_period(event, hwc, idx))
1980                         continue;
1981
1982                 if (perf_event_overflow(event, 1, &data, regs))
1983                         amd_pmu_disable_event(hwc, idx);
1984         }
1985
1986         if (handled)
1987                 inc_irq_stat(apic_perf_irqs);
1988
1989         return handled;
1990 }
1991
1992 void smp_perf_pending_interrupt(struct pt_regs *regs)
1993 {
1994         irq_enter();
1995         ack_APIC_irq();
1996         inc_irq_stat(apic_pending_irqs);
1997         perf_event_do_pending();
1998         irq_exit();
1999 }
2000
2001 void set_perf_event_pending(void)
2002 {
2003 #ifdef CONFIG_X86_LOCAL_APIC
2004         if (!x86_pmu.apic || !x86_pmu_initialized())
2005                 return;
2006
2007         apic->send_IPI_self(LOCAL_PENDING_VECTOR);
2008 #endif
2009 }
2010
2011 void perf_events_lapic_init(void)
2012 {
2013 #ifdef CONFIG_X86_LOCAL_APIC
2014         if (!x86_pmu.apic || !x86_pmu_initialized())
2015                 return;
2016
2017         /*
2018          * Always use NMI for PMU
2019          */
2020         apic_write(APIC_LVTPC, APIC_DM_NMI);
2021 #endif
2022 }
2023
2024 static int __kprobes
2025 perf_event_nmi_handler(struct notifier_block *self,
2026                          unsigned long cmd, void *__args)
2027 {
2028         struct die_args *args = __args;
2029         struct pt_regs *regs;
2030
2031         if (!atomic_read(&active_events))
2032                 return NOTIFY_DONE;
2033
2034         switch (cmd) {
2035         case DIE_NMI:
2036         case DIE_NMI_IPI:
2037                 break;
2038
2039         default:
2040                 return NOTIFY_DONE;
2041         }
2042
2043         regs = args->regs;
2044
2045 #ifdef CONFIG_X86_LOCAL_APIC
2046         apic_write(APIC_LVTPC, APIC_DM_NMI);
2047 #endif
2048         /*
2049          * Can't rely on the handled return value to say it was our NMI, two
2050          * events could trigger 'simultaneously' raising two back-to-back NMIs.
2051          *
2052          * If the first NMI handles both, the latter will be empty and daze
2053          * the CPU.
2054          */
2055         x86_pmu.handle_irq(regs);
2056
2057         return NOTIFY_STOP;
2058 }
2059
2060 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
2061         .notifier_call          = perf_event_nmi_handler,
2062         .next                   = NULL,
2063         .priority               = 1
2064 };
2065
2066 static __initconst struct x86_pmu p6_pmu = {
2067         .name                   = "p6",
2068         .handle_irq             = p6_pmu_handle_irq,
2069         .disable_all            = p6_pmu_disable_all,
2070         .enable_all             = p6_pmu_enable_all,
2071         .enable                 = p6_pmu_enable_event,
2072         .disable                = p6_pmu_disable_event,
2073         .eventsel               = MSR_P6_EVNTSEL0,
2074         .perfctr                = MSR_P6_PERFCTR0,
2075         .event_map              = p6_pmu_event_map,
2076         .raw_event              = p6_pmu_raw_event,
2077         .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
2078         .apic                   = 1,
2079         .max_period             = (1ULL << 31) - 1,
2080         .version                = 0,
2081         .num_events             = 2,
2082         /*
2083          * Events have 40 bits implemented. However they are designed such
2084          * that bits [32-39] are sign extensions of bit 31. As such the
2085          * effective width of a event for P6-like PMU is 32 bits only.
2086          *
2087          * See IA-32 Intel Architecture Software developer manual Vol 3B
2088          */
2089         .event_bits             = 32,
2090         .event_mask             = (1ULL << 32) - 1,
2091         .get_event_idx          = intel_get_event_idx,
2092 };
2093
2094 static __initconst struct x86_pmu intel_pmu = {
2095         .name                   = "Intel",
2096         .handle_irq             = intel_pmu_handle_irq,
2097         .disable_all            = intel_pmu_disable_all,
2098         .enable_all             = intel_pmu_enable_all,
2099         .enable                 = intel_pmu_enable_event,
2100         .disable                = intel_pmu_disable_event,
2101         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
2102         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
2103         .event_map              = intel_pmu_event_map,
2104         .raw_event              = intel_pmu_raw_event,
2105         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
2106         .apic                   = 1,
2107         /*
2108          * Intel PMCs cannot be accessed sanely above 32 bit width,
2109          * so we install an artificial 1<<31 period regardless of
2110          * the generic event period:
2111          */
2112         .max_period             = (1ULL << 31) - 1,
2113         .enable_bts             = intel_pmu_enable_bts,
2114         .disable_bts            = intel_pmu_disable_bts,
2115         .get_event_idx          = intel_get_event_idx,
2116 };
2117
2118 static __initconst struct x86_pmu amd_pmu = {
2119         .name                   = "AMD",
2120         .handle_irq             = amd_pmu_handle_irq,
2121         .disable_all            = amd_pmu_disable_all,
2122         .enable_all             = amd_pmu_enable_all,
2123         .enable                 = amd_pmu_enable_event,
2124         .disable                = amd_pmu_disable_event,
2125         .eventsel               = MSR_K7_EVNTSEL0,
2126         .perfctr                = MSR_K7_PERFCTR0,
2127         .event_map              = amd_pmu_event_map,
2128         .raw_event              = amd_pmu_raw_event,
2129         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
2130         .num_events             = 4,
2131         .event_bits             = 48,
2132         .event_mask             = (1ULL << 48) - 1,
2133         .apic                   = 1,
2134         /* use highest bit to detect overflow */
2135         .max_period             = (1ULL << 47) - 1,
2136         .get_event_idx          = gen_get_event_idx,
2137 };
2138
2139 static __init int p6_pmu_init(void)
2140 {
2141         switch (boot_cpu_data.x86_model) {
2142         case 1:
2143         case 3:  /* Pentium Pro */
2144         case 5:
2145         case 6:  /* Pentium II */
2146         case 7:
2147         case 8:
2148         case 11: /* Pentium III */
2149                 event_constraints = intel_p6_event_constraints;
2150                 break;
2151         case 9:
2152         case 13:
2153                 /* Pentium M */
2154                 event_constraints = intel_p6_event_constraints;
2155                 break;
2156         default:
2157                 pr_cont("unsupported p6 CPU model %d ",
2158                         boot_cpu_data.x86_model);
2159                 return -ENODEV;
2160         }
2161
2162         x86_pmu = p6_pmu;
2163
2164         return 0;
2165 }
2166
2167 static __init int intel_pmu_init(void)
2168 {
2169         union cpuid10_edx edx;
2170         union cpuid10_eax eax;
2171         unsigned int unused;
2172         unsigned int ebx;
2173         int version;
2174
2175         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2176                 /* check for P6 processor family */
2177            if (boot_cpu_data.x86 == 6) {
2178                 return p6_pmu_init();
2179            } else {
2180                 return -ENODEV;
2181            }
2182         }
2183
2184         /*
2185          * Check whether the Architectural PerfMon supports
2186          * Branch Misses Retired hw_event or not.
2187          */
2188         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2189         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2190                 return -ENODEV;
2191
2192         version = eax.split.version_id;
2193         if (version < 2)
2194                 return -ENODEV;
2195
2196         x86_pmu                         = intel_pmu;
2197         x86_pmu.version                 = version;
2198         x86_pmu.num_events              = eax.split.num_events;
2199         x86_pmu.event_bits              = eax.split.bit_width;
2200         x86_pmu.event_mask              = (1ULL << eax.split.bit_width) - 1;
2201
2202         /*
2203          * Quirk: v2 perfmon does not report fixed-purpose events, so
2204          * assume at least 3 events:
2205          */
2206         x86_pmu.num_events_fixed        = max((int)edx.split.num_events_fixed, 3);
2207
2208         /*
2209          * Install the hw-cache-events table:
2210          */
2211         switch (boot_cpu_data.x86_model) {
2212
2213         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2214         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2215         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2216         case 29: /* six-core 45 nm xeon "Dunnington" */
2217                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2218                        sizeof(hw_cache_event_ids));
2219
2220                 pr_cont("Core2 events, ");
2221                 event_constraints = intel_core_event_constraints;
2222                 break;
2223         default:
2224         case 26: /* 45 nm nehalem, "Bloomfield" */
2225         case 30: /* 45 nm nehalem, "Lynnfield" */
2226         case 46: /* 45 nm nehalem-ex, "Beckton" */
2227                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2228                        sizeof(hw_cache_event_ids));
2229
2230                 event_constraints = intel_nehalem_event_constraints;
2231                 pr_cont("Nehalem/Corei7 events, ");
2232                 break;
2233         case 28:
2234                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2235                        sizeof(hw_cache_event_ids));
2236
2237                 pr_cont("Atom events, ");
2238                 break;
2239
2240         case 37: /* 32 nm nehalem, "Clarkdale" */
2241         case 44: /* 32 nm nehalem, "Gulftown" */
2242                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
2243                        sizeof(hw_cache_event_ids));
2244
2245                 pr_cont("Westmere events, ");
2246                 break;
2247         }
2248         return 0;
2249 }
2250
2251 static __init int amd_pmu_init(void)
2252 {
2253         /* Performance-monitoring supported from K7 and later: */
2254         if (boot_cpu_data.x86 < 6)
2255                 return -ENODEV;
2256
2257         x86_pmu = amd_pmu;
2258
2259         /* Events are common for all AMDs */
2260         memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2261                sizeof(hw_cache_event_ids));
2262
2263         return 0;
2264 }
2265
2266 static void __init pmu_check_apic(void)
2267 {
2268         if (cpu_has_apic)
2269                 return;
2270
2271         x86_pmu.apic = 0;
2272         pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
2273         pr_info("no hardware sampling interrupt available.\n");
2274 }
2275
2276 void __init init_hw_perf_events(void)
2277 {
2278         int err;
2279
2280         pr_info("Performance Events: ");
2281
2282         switch (boot_cpu_data.x86_vendor) {
2283         case X86_VENDOR_INTEL:
2284                 err = intel_pmu_init();
2285                 break;
2286         case X86_VENDOR_AMD:
2287                 err = amd_pmu_init();
2288                 break;
2289         default:
2290                 return;
2291         }
2292         if (err != 0) {
2293                 pr_cont("no PMU driver, software events only.\n");
2294                 return;
2295         }
2296
2297         pmu_check_apic();
2298
2299         pr_cont("%s PMU driver.\n", x86_pmu.name);
2300
2301         if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
2302                 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2303                      x86_pmu.num_events, X86_PMC_MAX_GENERIC);
2304                 x86_pmu.num_events = X86_PMC_MAX_GENERIC;
2305         }
2306         perf_event_mask = (1 << x86_pmu.num_events) - 1;
2307         perf_max_events = x86_pmu.num_events;
2308
2309         if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
2310                 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2311                      x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
2312                 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
2313         }
2314
2315         perf_event_mask |=
2316                 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
2317         x86_pmu.intel_ctrl = perf_event_mask;
2318
2319         perf_events_lapic_init();
2320         register_die_notifier(&perf_event_nmi_notifier);
2321
2322         pr_info("... version:                %d\n",     x86_pmu.version);
2323         pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
2324         pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
2325         pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
2326         pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
2327         pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
2328         pr_info("... event mask:             %016Lx\n", perf_event_mask);
2329 }
2330
2331 static inline void x86_pmu_read(struct perf_event *event)
2332 {
2333         x86_perf_event_update(event, &event->hw, event->hw.idx);
2334 }
2335
2336 static const struct pmu pmu = {
2337         .enable         = x86_pmu_enable,
2338         .disable        = x86_pmu_disable,
2339         .read           = x86_pmu_read,
2340         .unthrottle     = x86_pmu_unthrottle,
2341 };
2342
2343 static int
2344 validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
2345 {
2346         struct hw_perf_event fake_event = event->hw;
2347
2348         if (event->pmu && event->pmu != &pmu)
2349                 return 0;
2350
2351         return x86_schedule_event(cpuc, &fake_event) >= 0;
2352 }
2353
2354 static int validate_group(struct perf_event *event)
2355 {
2356         struct perf_event *sibling, *leader = event->group_leader;
2357         struct cpu_hw_events fake_pmu;
2358
2359         memset(&fake_pmu, 0, sizeof(fake_pmu));
2360
2361         if (!validate_event(&fake_pmu, leader))
2362                 return -ENOSPC;
2363
2364         list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
2365                 if (!validate_event(&fake_pmu, sibling))
2366                         return -ENOSPC;
2367         }
2368
2369         if (!validate_event(&fake_pmu, event))
2370                 return -ENOSPC;
2371
2372         return 0;
2373 }
2374
2375 const struct pmu *hw_perf_event_init(struct perf_event *event)
2376 {
2377         int err;
2378
2379         err = __hw_perf_event_init(event);
2380         if (!err) {
2381                 if (event->group_leader != event)
2382                         err = validate_group(event);
2383         }
2384         if (err) {
2385                 if (event->destroy)
2386                         event->destroy(event);
2387                 return ERR_PTR(err);
2388         }
2389
2390         return &pmu;
2391 }
2392
2393 /*
2394  * callchain support
2395  */
2396
2397 static inline
2398 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2399 {
2400         if (entry->nr < PERF_MAX_STACK_DEPTH)
2401                 entry->ip[entry->nr++] = ip;
2402 }
2403
2404 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2405 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2406 static DEFINE_PER_CPU(int, in_ignored_frame);
2407
2408
2409 static void
2410 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
2411 {
2412         /* Ignore warnings */
2413 }
2414
2415 static void backtrace_warning(void *data, char *msg)
2416 {
2417         /* Ignore warnings */
2418 }
2419
2420 static int backtrace_stack(void *data, char *name)
2421 {
2422         per_cpu(in_ignored_frame, smp_processor_id()) =
2423                         x86_is_stack_id(NMI_STACK, name) ||
2424                         x86_is_stack_id(DEBUG_STACK, name);
2425
2426         return 0;
2427 }
2428
2429 static void backtrace_address(void *data, unsigned long addr, int reliable)
2430 {
2431         struct perf_callchain_entry *entry = data;
2432
2433         if (per_cpu(in_ignored_frame, smp_processor_id()))
2434                 return;
2435
2436         if (reliable)
2437                 callchain_store(entry, addr);
2438 }
2439
2440 static const struct stacktrace_ops backtrace_ops = {
2441         .warning                = backtrace_warning,
2442         .warning_symbol         = backtrace_warning_symbol,
2443         .stack                  = backtrace_stack,
2444         .address                = backtrace_address,
2445         .walk_stack             = print_context_stack_bp,
2446 };
2447
2448 #include "../dumpstack.h"
2449
2450 static void
2451 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
2452 {
2453         callchain_store(entry, PERF_CONTEXT_KERNEL);
2454         callchain_store(entry, regs->ip);
2455
2456         dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
2457 }
2458
2459 /*
2460  * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
2461  */
2462 static unsigned long
2463 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
2464 {
2465         unsigned long offset, addr = (unsigned long)from;
2466         int type = in_nmi() ? KM_NMI : KM_IRQ0;
2467         unsigned long size, len = 0;
2468         struct page *page;
2469         void *map;
2470         int ret;
2471
2472         do {
2473                 ret = __get_user_pages_fast(addr, 1, 0, &page);
2474                 if (!ret)
2475                         break;
2476
2477                 offset = addr & (PAGE_SIZE - 1);
2478                 size = min(PAGE_SIZE - offset, n - len);
2479
2480                 map = kmap_atomic(page, type);
2481                 memcpy(to, map+offset, size);
2482                 kunmap_atomic(map, type);
2483                 put_page(page);
2484
2485                 len  += size;
2486                 to   += size;
2487                 addr += size;
2488
2489         } while (len < n);
2490
2491         return len;
2492 }
2493
2494 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
2495 {
2496         unsigned long bytes;
2497
2498         bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
2499
2500         return bytes == sizeof(*frame);
2501 }
2502
2503 static void
2504 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
2505 {
2506         struct stack_frame frame;
2507         const void __user *fp;
2508
2509         if (!user_mode(regs))
2510                 regs = task_pt_regs(current);
2511
2512         fp = (void __user *)regs->bp;
2513
2514         callchain_store(entry, PERF_CONTEXT_USER);
2515         callchain_store(entry, regs->ip);
2516
2517         while (entry->nr < PERF_MAX_STACK_DEPTH) {
2518                 frame.next_frame             = NULL;
2519                 frame.return_address = 0;
2520
2521                 if (!copy_stack_frame(fp, &frame))
2522                         break;
2523
2524                 if ((unsigned long)fp < regs->sp)
2525                         break;
2526
2527                 callchain_store(entry, frame.return_address);
2528                 fp = frame.next_frame;
2529         }
2530 }
2531
2532 static void
2533 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2534 {
2535         int is_user;
2536
2537         if (!regs)
2538                 return;
2539
2540         is_user = user_mode(regs);
2541
2542         if (!current || current->pid == 0)
2543                 return;
2544
2545         if (is_user && current->state != TASK_RUNNING)
2546                 return;
2547
2548         if (!is_user)
2549                 perf_callchain_kernel(regs, entry);
2550
2551         if (current->mm)
2552                 perf_callchain_user(regs, entry);
2553 }
2554
2555 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2556 {
2557         struct perf_callchain_entry *entry;
2558
2559         if (in_nmi())
2560                 entry = &__get_cpu_var(pmc_nmi_entry);
2561         else
2562                 entry = &__get_cpu_var(pmc_irq_entry);
2563
2564         entry->nr = 0;
2565
2566         perf_do_callchain(regs, entry);
2567
2568         return entry;
2569 }
2570
2571 void hw_perf_event_setup_online(int cpu)
2572 {
2573         init_debug_store_on_cpu(cpu);
2574 }