]> git.karo-electronics.de Git - mv-sheeva.git/blob - arch/x86/kernel/cpu/perf_counter.c
perf_counter, x86: remove get_status() from struct x86_pmu
[mv-sheeva.git] / arch / x86 / kernel / cpu / perf_counter.c
1 /*
2  * Performance counter x86 architecture code
3  *
4  *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
6  *  Copyright(C) 2009 Jaswinder Singh Rajput
7  *  Copyright(C) 2009 Advanced Micro Devices, Inc., Robert Richter
8  *
9  *  For licencing details see kernel-base/COPYING
10  */
11
12 #include <linux/perf_counter.h>
13 #include <linux/capability.h>
14 #include <linux/notifier.h>
15 #include <linux/hardirq.h>
16 #include <linux/kprobes.h>
17 #include <linux/module.h>
18 #include <linux/kdebug.h>
19 #include <linux/sched.h>
20 #include <linux/uaccess.h>
21
22 #include <asm/apic.h>
23 #include <asm/stacktrace.h>
24 #include <asm/nmi.h>
25
26 static bool perf_counters_initialized __read_mostly;
27
28 /*
29  * Number of (generic) HW counters:
30  */
31 static int nr_counters_generic __read_mostly;
32 static u64 perf_counter_mask __read_mostly;
33 static u64 counter_value_mask __read_mostly;
34 static int counter_value_bits __read_mostly;
35
36 static int nr_counters_fixed __read_mostly;
37
38 struct cpu_hw_counters {
39         struct perf_counter     *counters[X86_PMC_IDX_MAX];
40         unsigned long           used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
41         unsigned long           interrupts;
42         u64                     throttle_ctrl;
43         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
44         int                     enabled;
45 };
46
47 /*
48  * struct x86_pmu - generic x86 pmu
49  */
50 struct x86_pmu {
51         int             (*handle_irq)(struct pt_regs *, int);
52         u64             (*save_disable_all)(void);
53         void            (*restore_all)(u64);
54         void            (*ack_status)(u64);
55         void            (*enable)(int, u64);
56         void            (*disable)(int, u64);
57         unsigned        eventsel;
58         unsigned        perfctr;
59         u64             (*event_map)(int);
60         u64             (*raw_event)(u64);
61         int             max_events;
62 };
63
64 static struct x86_pmu *x86_pmu __read_mostly;
65
66 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
67         .enabled = 1,
68 };
69
70 static __read_mostly int intel_perfmon_version;
71
72 /*
73  * Intel PerfMon v3. Used on Core2 and later.
74  */
75 static const u64 intel_perfmon_event_map[] =
76 {
77   [PERF_COUNT_CPU_CYCLES]               = 0x003c,
78   [PERF_COUNT_INSTRUCTIONS]             = 0x00c0,
79   [PERF_COUNT_CACHE_REFERENCES]         = 0x4f2e,
80   [PERF_COUNT_CACHE_MISSES]             = 0x412e,
81   [PERF_COUNT_BRANCH_INSTRUCTIONS]      = 0x00c4,
82   [PERF_COUNT_BRANCH_MISSES]            = 0x00c5,
83   [PERF_COUNT_BUS_CYCLES]               = 0x013c,
84 };
85
86 static u64 intel_pmu_event_map(int event)
87 {
88         return intel_perfmon_event_map[event];
89 }
90
91 static u64 intel_pmu_raw_event(u64 event)
92 {
93 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
94 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
95 #define CORE_EVNTSEL_COUNTER_MASK       0xFF000000ULL
96
97 #define CORE_EVNTSEL_MASK               \
98         (CORE_EVNTSEL_EVENT_MASK |      \
99          CORE_EVNTSEL_UNIT_MASK  |      \
100          CORE_EVNTSEL_COUNTER_MASK)
101
102         return event & CORE_EVNTSEL_MASK;
103 }
104
105 /*
106  * AMD Performance Monitor K7 and later.
107  */
108 static const u64 amd_perfmon_event_map[] =
109 {
110   [PERF_COUNT_CPU_CYCLES]               = 0x0076,
111   [PERF_COUNT_INSTRUCTIONS]             = 0x00c0,
112   [PERF_COUNT_CACHE_REFERENCES]         = 0x0080,
113   [PERF_COUNT_CACHE_MISSES]             = 0x0081,
114   [PERF_COUNT_BRANCH_INSTRUCTIONS]      = 0x00c4,
115   [PERF_COUNT_BRANCH_MISSES]            = 0x00c5,
116 };
117
118 static u64 amd_pmu_event_map(int event)
119 {
120         return amd_perfmon_event_map[event];
121 }
122
123 static u64 amd_pmu_raw_event(u64 event)
124 {
125 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
126 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
127 #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
128
129 #define K7_EVNTSEL_MASK                 \
130         (K7_EVNTSEL_EVENT_MASK |        \
131          K7_EVNTSEL_UNIT_MASK  |        \
132          K7_EVNTSEL_COUNTER_MASK)
133
134         return event & K7_EVNTSEL_MASK;
135 }
136
137 /*
138  * Propagate counter elapsed time into the generic counter.
139  * Can only be executed on the CPU where the counter is active.
140  * Returns the delta events processed.
141  */
142 static void
143 x86_perf_counter_update(struct perf_counter *counter,
144                         struct hw_perf_counter *hwc, int idx)
145 {
146         u64 prev_raw_count, new_raw_count, delta;
147
148         /*
149          * Careful: an NMI might modify the previous counter value.
150          *
151          * Our tactic to handle this is to first atomically read and
152          * exchange a new raw count - then add that new-prev delta
153          * count to the generic counter atomically:
154          */
155 again:
156         prev_raw_count = atomic64_read(&hwc->prev_count);
157         rdmsrl(hwc->counter_base + idx, new_raw_count);
158
159         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
160                                         new_raw_count) != prev_raw_count)
161                 goto again;
162
163         /*
164          * Now we have the new raw value and have updated the prev
165          * timestamp already. We can now calculate the elapsed delta
166          * (counter-)time and add that to the generic counter.
167          *
168          * Careful, not all hw sign-extends above the physical width
169          * of the count, so we do that by clipping the delta to 32 bits:
170          */
171         delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
172
173         atomic64_add(delta, &counter->count);
174         atomic64_sub(delta, &hwc->period_left);
175 }
176
177 static atomic_t num_counters;
178 static DEFINE_MUTEX(pmc_reserve_mutex);
179
180 static bool reserve_pmc_hardware(void)
181 {
182         int i;
183
184         if (nmi_watchdog == NMI_LOCAL_APIC)
185                 disable_lapic_nmi_watchdog();
186
187         for (i = 0; i < nr_counters_generic; i++) {
188                 if (!reserve_perfctr_nmi(x86_pmu->perfctr + i))
189                         goto perfctr_fail;
190         }
191
192         for (i = 0; i < nr_counters_generic; i++) {
193                 if (!reserve_evntsel_nmi(x86_pmu->eventsel + i))
194                         goto eventsel_fail;
195         }
196
197         return true;
198
199 eventsel_fail:
200         for (i--; i >= 0; i--)
201                 release_evntsel_nmi(x86_pmu->eventsel + i);
202
203         i = nr_counters_generic;
204
205 perfctr_fail:
206         for (i--; i >= 0; i--)
207                 release_perfctr_nmi(x86_pmu->perfctr + i);
208
209         if (nmi_watchdog == NMI_LOCAL_APIC)
210                 enable_lapic_nmi_watchdog();
211
212         return false;
213 }
214
215 static void release_pmc_hardware(void)
216 {
217         int i;
218
219         for (i = 0; i < nr_counters_generic; i++) {
220                 release_perfctr_nmi(x86_pmu->perfctr + i);
221                 release_evntsel_nmi(x86_pmu->eventsel + i);
222         }
223
224         if (nmi_watchdog == NMI_LOCAL_APIC)
225                 enable_lapic_nmi_watchdog();
226 }
227
228 static void hw_perf_counter_destroy(struct perf_counter *counter)
229 {
230         if (atomic_dec_and_mutex_lock(&num_counters, &pmc_reserve_mutex)) {
231                 release_pmc_hardware();
232                 mutex_unlock(&pmc_reserve_mutex);
233         }
234 }
235
236 /*
237  * Setup the hardware configuration for a given hw_event_type
238  */
239 static int __hw_perf_counter_init(struct perf_counter *counter)
240 {
241         struct perf_counter_hw_event *hw_event = &counter->hw_event;
242         struct hw_perf_counter *hwc = &counter->hw;
243         int err;
244
245         /* disable temporarily */
246         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
247                 return -ENOSYS;
248
249         if (unlikely(!perf_counters_initialized))
250                 return -EINVAL;
251
252         err = 0;
253         if (atomic_inc_not_zero(&num_counters)) {
254                 mutex_lock(&pmc_reserve_mutex);
255                 if (atomic_read(&num_counters) == 0 && !reserve_pmc_hardware())
256                         err = -EBUSY;
257                 else
258                         atomic_inc(&num_counters);
259                 mutex_unlock(&pmc_reserve_mutex);
260         }
261         if (err)
262                 return err;
263
264         /*
265          * Generate PMC IRQs:
266          * (keep 'enabled' bit clear for now)
267          */
268         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
269
270         /*
271          * Count user and OS events unless requested not to.
272          */
273         if (!hw_event->exclude_user)
274                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
275         if (!hw_event->exclude_kernel)
276                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
277
278         /*
279          * If privileged enough, allow NMI events:
280          */
281         hwc->nmi = 0;
282         if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
283                 hwc->nmi = 1;
284
285         hwc->irq_period         = hw_event->irq_period;
286         /*
287          * Intel PMCs cannot be accessed sanely above 32 bit width,
288          * so we install an artificial 1<<31 period regardless of
289          * the generic counter period:
290          */
291         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
292                 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
293                         hwc->irq_period = 0x7FFFFFFF;
294
295         atomic64_set(&hwc->period_left, hwc->irq_period);
296
297         /*
298          * Raw event type provide the config in the event structure
299          */
300         if (perf_event_raw(hw_event)) {
301                 hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event));
302         } else {
303                 if (perf_event_id(hw_event) >= x86_pmu->max_events)
304                         return -EINVAL;
305                 /*
306                  * The generic map:
307                  */
308                 hwc->config |= x86_pmu->event_map(perf_event_id(hw_event));
309         }
310
311         counter->destroy = hw_perf_counter_destroy;
312
313         return 0;
314 }
315
316 static u64 intel_pmu_save_disable_all(void)
317 {
318         u64 ctrl;
319
320         rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
321         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
322
323         return ctrl;
324 }
325
326 static u64 amd_pmu_save_disable_all(void)
327 {
328         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
329         int enabled, idx;
330
331         enabled = cpuc->enabled;
332         cpuc->enabled = 0;
333         /*
334          * ensure we write the disable before we start disabling the
335          * counters proper, so that amd_pmu_enable_counter() does the
336          * right thing.
337          */
338         barrier();
339
340         for (idx = 0; idx < nr_counters_generic; idx++) {
341                 u64 val;
342
343                 if (!test_bit(idx, cpuc->active_mask))
344                         continue;
345                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
346                 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
347                         continue;
348                 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
349                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
350         }
351
352         return enabled;
353 }
354
355 u64 hw_perf_save_disable(void)
356 {
357         if (unlikely(!perf_counters_initialized))
358                 return 0;
359
360         return x86_pmu->save_disable_all();
361 }
362 /*
363  * Exported because of ACPI idle
364  */
365 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
366
367 static void intel_pmu_restore_all(u64 ctrl)
368 {
369         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
370 }
371
372 static void amd_pmu_restore_all(u64 ctrl)
373 {
374         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
375         int idx;
376
377         cpuc->enabled = ctrl;
378         barrier();
379         if (!ctrl)
380                 return;
381
382         for (idx = 0; idx < nr_counters_generic; idx++) {
383                 u64 val;
384
385                 if (!test_bit(idx, cpuc->active_mask))
386                         continue;
387                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
388                 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
389                         continue;
390                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
391                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
392         }
393 }
394
395 void hw_perf_restore(u64 ctrl)
396 {
397         if (unlikely(!perf_counters_initialized))
398                 return;
399
400         x86_pmu->restore_all(ctrl);
401 }
402 /*
403  * Exported because of ACPI idle
404  */
405 EXPORT_SYMBOL_GPL(hw_perf_restore);
406
407 static inline u64 intel_pmu_get_status(u64 mask)
408 {
409         u64 status;
410
411         if (unlikely(!perf_counters_initialized))
412                 return 0;
413         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
414
415         return status;
416 }
417
418 static void intel_pmu_ack_status(u64 ack)
419 {
420         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
421 }
422
423 static void amd_pmu_ack_status(u64 ack)
424 {
425 }
426
427 static void hw_perf_ack_status(u64 ack)
428 {
429         if (unlikely(!perf_counters_initialized))
430                 return;
431
432         x86_pmu->ack_status(ack);
433 }
434
435 static void intel_pmu_enable_counter(int idx, u64 config)
436 {
437         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
438                         config | ARCH_PERFMON_EVENTSEL0_ENABLE);
439 }
440
441 static void amd_pmu_enable_counter(int idx, u64 config)
442 {
443         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
444
445         set_bit(idx, cpuc->active_mask);
446         if (cpuc->enabled)
447                 config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
448
449         wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
450 }
451
452 static void hw_perf_enable(int idx, u64 config)
453 {
454         if (unlikely(!perf_counters_initialized))
455                 return;
456
457         x86_pmu->enable(idx, config);
458 }
459
460 static void intel_pmu_disable_counter(int idx, u64 config)
461 {
462         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
463 }
464
465 static void amd_pmu_disable_counter(int idx, u64 config)
466 {
467         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
468
469         clear_bit(idx, cpuc->active_mask);
470         wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
471
472 }
473
474 static void hw_perf_disable(int idx, u64 config)
475 {
476         if (unlikely(!perf_counters_initialized))
477                 return;
478
479         x86_pmu->disable(idx, config);
480 }
481
482 static inline void
483 __pmc_fixed_disable(struct perf_counter *counter,
484                     struct hw_perf_counter *hwc, unsigned int __idx)
485 {
486         int idx = __idx - X86_PMC_IDX_FIXED;
487         u64 ctrl_val, mask;
488         int err;
489
490         mask = 0xfULL << (idx * 4);
491
492         rdmsrl(hwc->config_base, ctrl_val);
493         ctrl_val &= ~mask;
494         err = checking_wrmsrl(hwc->config_base, ctrl_val);
495 }
496
497 static inline void
498 __x86_pmu_disable(struct perf_counter *counter,
499                   struct hw_perf_counter *hwc, unsigned int idx)
500 {
501         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
502                 __pmc_fixed_disable(counter, hwc, idx);
503         else
504                 hw_perf_disable(idx, hwc->config);
505 }
506
507 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
508
509 /*
510  * Set the next IRQ period, based on the hwc->period_left value.
511  * To be called with the counter disabled in hw:
512  */
513 static void
514 __hw_perf_counter_set_period(struct perf_counter *counter,
515                              struct hw_perf_counter *hwc, int idx)
516 {
517         s64 left = atomic64_read(&hwc->period_left);
518         s64 period = hwc->irq_period;
519         int err;
520
521         /*
522          * If we are way outside a reasoable range then just skip forward:
523          */
524         if (unlikely(left <= -period)) {
525                 left = period;
526                 atomic64_set(&hwc->period_left, left);
527         }
528
529         if (unlikely(left <= 0)) {
530                 left += period;
531                 atomic64_set(&hwc->period_left, left);
532         }
533
534         per_cpu(prev_left[idx], smp_processor_id()) = left;
535
536         /*
537          * The hw counter starts counting from this counter offset,
538          * mark it to be able to extra future deltas:
539          */
540         atomic64_set(&hwc->prev_count, (u64)-left);
541
542         err = checking_wrmsrl(hwc->counter_base + idx,
543                              (u64)(-left) & counter_value_mask);
544 }
545
546 static inline void
547 __pmc_fixed_enable(struct perf_counter *counter,
548                    struct hw_perf_counter *hwc, unsigned int __idx)
549 {
550         int idx = __idx - X86_PMC_IDX_FIXED;
551         u64 ctrl_val, bits, mask;
552         int err;
553
554         /*
555          * Enable IRQ generation (0x8),
556          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
557          * if requested:
558          */
559         bits = 0x8ULL;
560         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
561                 bits |= 0x2;
562         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
563                 bits |= 0x1;
564         bits <<= (idx * 4);
565         mask = 0xfULL << (idx * 4);
566
567         rdmsrl(hwc->config_base, ctrl_val);
568         ctrl_val &= ~mask;
569         ctrl_val |= bits;
570         err = checking_wrmsrl(hwc->config_base, ctrl_val);
571 }
572
573 static void
574 __x86_pmu_enable(struct perf_counter *counter,
575                  struct hw_perf_counter *hwc, int idx)
576 {
577         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
578                 __pmc_fixed_enable(counter, hwc, idx);
579         else
580                 hw_perf_enable(idx, hwc->config);
581 }
582
583 static int
584 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
585 {
586         unsigned int event;
587
588         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
589                 return -1;
590
591         if (unlikely(hwc->nmi))
592                 return -1;
593
594         event = hwc->config & ARCH_PERFMON_EVENT_MASK;
595
596         if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS)))
597                 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
598         if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES)))
599                 return X86_PMC_IDX_FIXED_CPU_CYCLES;
600         if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES)))
601                 return X86_PMC_IDX_FIXED_BUS_CYCLES;
602
603         return -1;
604 }
605
606 /*
607  * Find a PMC slot for the freshly enabled / scheduled in counter:
608  */
609 static int x86_pmu_enable(struct perf_counter *counter)
610 {
611         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
612         struct hw_perf_counter *hwc = &counter->hw;
613         int idx;
614
615         idx = fixed_mode_idx(counter, hwc);
616         if (idx >= 0) {
617                 /*
618                  * Try to get the fixed counter, if that is already taken
619                  * then try to get a generic counter:
620                  */
621                 if (test_and_set_bit(idx, cpuc->used))
622                         goto try_generic;
623
624                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
625                 /*
626                  * We set it so that counter_base + idx in wrmsr/rdmsr maps to
627                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
628                  */
629                 hwc->counter_base =
630                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
631                 hwc->idx = idx;
632         } else {
633                 idx = hwc->idx;
634                 /* Try to get the previous generic counter again */
635                 if (test_and_set_bit(idx, cpuc->used)) {
636 try_generic:
637                         idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
638                         if (idx == nr_counters_generic)
639                                 return -EAGAIN;
640
641                         set_bit(idx, cpuc->used);
642                         hwc->idx = idx;
643                 }
644                 hwc->config_base  = x86_pmu->eventsel;
645                 hwc->counter_base = x86_pmu->perfctr;
646         }
647
648         perf_counters_lapic_init(hwc->nmi);
649
650         __x86_pmu_disable(counter, hwc, idx);
651
652         cpuc->counters[idx] = counter;
653         /*
654          * Make it visible before enabling the hw:
655          */
656         barrier();
657
658         __hw_perf_counter_set_period(counter, hwc, idx);
659         __x86_pmu_enable(counter, hwc, idx);
660
661         return 0;
662 }
663
664 void perf_counter_print_debug(void)
665 {
666         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
667         struct cpu_hw_counters *cpuc;
668         int cpu, idx;
669
670         if (!nr_counters_generic)
671                 return;
672
673         local_irq_disable();
674
675         cpu = smp_processor_id();
676         cpuc = &per_cpu(cpu_hw_counters, cpu);
677
678         if (intel_perfmon_version >= 2) {
679                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
680                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
681                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
682                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
683
684                 pr_info("\n");
685                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
686                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
687                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
688                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
689         }
690         pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
691
692         for (idx = 0; idx < nr_counters_generic; idx++) {
693                 rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl);
694                 rdmsrl(x86_pmu->perfctr  + idx, pmc_count);
695
696                 prev_left = per_cpu(prev_left[idx], cpu);
697
698                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
699                         cpu, idx, pmc_ctrl);
700                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
701                         cpu, idx, pmc_count);
702                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
703                         cpu, idx, prev_left);
704         }
705         for (idx = 0; idx < nr_counters_fixed; idx++) {
706                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
707
708                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
709                         cpu, idx, pmc_count);
710         }
711         local_irq_enable();
712 }
713
714 static void x86_pmu_disable(struct perf_counter *counter)
715 {
716         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
717         struct hw_perf_counter *hwc = &counter->hw;
718         unsigned int idx = hwc->idx;
719
720         __x86_pmu_disable(counter, hwc, idx);
721
722         clear_bit(idx, cpuc->used);
723         cpuc->counters[idx] = NULL;
724         /*
725          * Make sure the cleared pointer becomes visible before we
726          * (potentially) free the counter:
727          */
728         barrier();
729
730         /*
731          * Drain the remaining delta count out of a counter
732          * that we are disabling:
733          */
734         x86_perf_counter_update(counter, hwc, idx);
735 }
736
737 /*
738  * Save and restart an expired counter. Called by NMI contexts,
739  * so it has to be careful about preempting normal counter ops:
740  */
741 static void perf_save_and_restart(struct perf_counter *counter)
742 {
743         struct hw_perf_counter *hwc = &counter->hw;
744         int idx = hwc->idx;
745
746         x86_perf_counter_update(counter, hwc, idx);
747         __hw_perf_counter_set_period(counter, hwc, idx);
748
749         if (counter->state == PERF_COUNTER_STATE_ACTIVE)
750                 __x86_pmu_enable(counter, hwc, idx);
751 }
752
753 /*
754  * Maximum interrupt frequency of 100KHz per CPU
755  */
756 #define PERFMON_MAX_INTERRUPTS (100000/HZ)
757
758 /*
759  * This handler is triggered by the local APIC, so the APIC IRQ handling
760  * rules apply:
761  */
762 static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
763 {
764         int bit, cpu = smp_processor_id();
765         u64 ack, status;
766         struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
767         int ret = 0;
768
769         cpuc->throttle_ctrl = hw_perf_save_disable();
770
771         status = intel_pmu_get_status(cpuc->throttle_ctrl);
772         if (!status)
773                 goto out;
774
775         ret = 1;
776 again:
777         inc_irq_stat(apic_perf_irqs);
778         ack = status;
779         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
780                 struct perf_counter *counter = cpuc->counters[bit];
781
782                 clear_bit(bit, (unsigned long *) &status);
783                 if (!counter)
784                         continue;
785
786                 perf_save_and_restart(counter);
787                 if (perf_counter_overflow(counter, nmi, regs, 0))
788                         __x86_pmu_disable(counter, &counter->hw, bit);
789         }
790
791         hw_perf_ack_status(ack);
792
793         /*
794          * Repeat if there is more work to be done:
795          */
796         status = intel_pmu_get_status(cpuc->throttle_ctrl);
797         if (status)
798                 goto again;
799 out:
800         /*
801          * Restore - do not reenable when global enable is off or throttled:
802          */
803         if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
804                 hw_perf_restore(cpuc->throttle_ctrl);
805
806         return ret;
807 }
808
809 static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; }
810
811 void perf_counter_unthrottle(void)
812 {
813         struct cpu_hw_counters *cpuc;
814
815         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
816                 return;
817
818         if (unlikely(!perf_counters_initialized))
819                 return;
820
821         cpuc = &__get_cpu_var(cpu_hw_counters);
822         if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
823                 if (printk_ratelimit())
824                         printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
825                 hw_perf_restore(cpuc->throttle_ctrl);
826         }
827         cpuc->interrupts = 0;
828 }
829
830 void smp_perf_counter_interrupt(struct pt_regs *regs)
831 {
832         irq_enter();
833         apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
834         ack_APIC_irq();
835         x86_pmu->handle_irq(regs, 0);
836         irq_exit();
837 }
838
839 void smp_perf_pending_interrupt(struct pt_regs *regs)
840 {
841         irq_enter();
842         ack_APIC_irq();
843         inc_irq_stat(apic_pending_irqs);
844         perf_counter_do_pending();
845         irq_exit();
846 }
847
848 void set_perf_counter_pending(void)
849 {
850         apic->send_IPI_self(LOCAL_PENDING_VECTOR);
851 }
852
853 void perf_counters_lapic_init(int nmi)
854 {
855         u32 apic_val;
856
857         if (!perf_counters_initialized)
858                 return;
859         /*
860          * Enable the performance counter vector in the APIC LVT:
861          */
862         apic_val = apic_read(APIC_LVTERR);
863
864         apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
865         if (nmi)
866                 apic_write(APIC_LVTPC, APIC_DM_NMI);
867         else
868                 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
869         apic_write(APIC_LVTERR, apic_val);
870 }
871
872 static int __kprobes
873 perf_counter_nmi_handler(struct notifier_block *self,
874                          unsigned long cmd, void *__args)
875 {
876         struct die_args *args = __args;
877         struct pt_regs *regs;
878         int ret;
879
880         switch (cmd) {
881         case DIE_NMI:
882         case DIE_NMI_IPI:
883                 break;
884
885         default:
886                 return NOTIFY_DONE;
887         }
888
889         regs = args->regs;
890
891         apic_write(APIC_LVTPC, APIC_DM_NMI);
892         ret = x86_pmu->handle_irq(regs, 1);
893
894         return ret ? NOTIFY_STOP : NOTIFY_OK;
895 }
896
897 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
898         .notifier_call          = perf_counter_nmi_handler,
899         .next                   = NULL,
900         .priority               = 1
901 };
902
903 static struct x86_pmu intel_pmu = {
904         .handle_irq             = intel_pmu_handle_irq,
905         .save_disable_all       = intel_pmu_save_disable_all,
906         .restore_all            = intel_pmu_restore_all,
907         .ack_status             = intel_pmu_ack_status,
908         .enable                 = intel_pmu_enable_counter,
909         .disable                = intel_pmu_disable_counter,
910         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
911         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
912         .event_map              = intel_pmu_event_map,
913         .raw_event              = intel_pmu_raw_event,
914         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
915 };
916
917 static struct x86_pmu amd_pmu = {
918         .handle_irq             = amd_pmu_handle_irq,
919         .save_disable_all       = amd_pmu_save_disable_all,
920         .restore_all            = amd_pmu_restore_all,
921         .ack_status             = amd_pmu_ack_status,
922         .enable                 = amd_pmu_enable_counter,
923         .disable                = amd_pmu_disable_counter,
924         .eventsel               = MSR_K7_EVNTSEL0,
925         .perfctr                = MSR_K7_PERFCTR0,
926         .event_map              = amd_pmu_event_map,
927         .raw_event              = amd_pmu_raw_event,
928         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
929 };
930
931 static struct x86_pmu *intel_pmu_init(void)
932 {
933         union cpuid10_edx edx;
934         union cpuid10_eax eax;
935         unsigned int unused;
936         unsigned int ebx;
937
938         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
939                 return NULL;
940
941         /*
942          * Check whether the Architectural PerfMon supports
943          * Branch Misses Retired Event or not.
944          */
945         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
946         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
947                 return NULL;
948
949         intel_perfmon_version = eax.split.version_id;
950         if (intel_perfmon_version < 2)
951                 return NULL;
952
953         pr_info("Intel Performance Monitoring support detected.\n");
954         pr_info("... version:         %d\n", intel_perfmon_version);
955         pr_info("... bit width:       %d\n", eax.split.bit_width);
956         pr_info("... mask length:     %d\n", eax.split.mask_length);
957
958         nr_counters_generic = eax.split.num_counters;
959         nr_counters_fixed = edx.split.num_counters_fixed;
960         counter_value_mask = (1ULL << eax.split.bit_width) - 1;
961
962         return &intel_pmu;
963 }
964
965 static struct x86_pmu *amd_pmu_init(void)
966 {
967         nr_counters_generic = 4;
968         nr_counters_fixed = 0;
969         counter_value_mask = 0x0000FFFFFFFFFFFFULL;
970         counter_value_bits = 48;
971
972         pr_info("AMD Performance Monitoring support detected.\n");
973
974         return &amd_pmu;
975 }
976
977 void __init init_hw_perf_counters(void)
978 {
979         switch (boot_cpu_data.x86_vendor) {
980         case X86_VENDOR_INTEL:
981                 x86_pmu = intel_pmu_init();
982                 break;
983         case X86_VENDOR_AMD:
984                 x86_pmu = amd_pmu_init();
985                 break;
986         default:
987                 return;
988         }
989         if (!x86_pmu)
990                 return;
991
992         pr_info("... num counters:    %d\n", nr_counters_generic);
993         if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
994                 nr_counters_generic = X86_PMC_MAX_GENERIC;
995                 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
996                         nr_counters_generic, X86_PMC_MAX_GENERIC);
997         }
998         perf_counter_mask = (1 << nr_counters_generic) - 1;
999         perf_max_counters = nr_counters_generic;
1000
1001         pr_info("... value mask:      %016Lx\n", counter_value_mask);
1002
1003         if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
1004                 nr_counters_fixed = X86_PMC_MAX_FIXED;
1005                 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1006                         nr_counters_fixed, X86_PMC_MAX_FIXED);
1007         }
1008         pr_info("... fixed counters:  %d\n", nr_counters_fixed);
1009
1010         perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1011
1012         pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
1013         perf_counters_initialized = true;
1014
1015         perf_counters_lapic_init(0);
1016         register_die_notifier(&perf_counter_nmi_notifier);
1017 }
1018
1019 static void x86_pmu_read(struct perf_counter *counter)
1020 {
1021         x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1022 }
1023
1024 static const struct pmu pmu = {
1025         .enable         = x86_pmu_enable,
1026         .disable        = x86_pmu_disable,
1027         .read           = x86_pmu_read,
1028 };
1029
1030 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1031 {
1032         int err;
1033
1034         err = __hw_perf_counter_init(counter);
1035         if (err)
1036                 return ERR_PTR(err);
1037
1038         return &pmu;
1039 }
1040
1041 /*
1042  * callchain support
1043  */
1044
1045 static inline
1046 void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
1047 {
1048         if (entry->nr < MAX_STACK_DEPTH)
1049                 entry->ip[entry->nr++] = ip;
1050 }
1051
1052 static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1053 static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1054
1055
1056 static void
1057 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1058 {
1059         /* Ignore warnings */
1060 }
1061
1062 static void backtrace_warning(void *data, char *msg)
1063 {
1064         /* Ignore warnings */
1065 }
1066
1067 static int backtrace_stack(void *data, char *name)
1068 {
1069         /* Don't bother with IRQ stacks for now */
1070         return -1;
1071 }
1072
1073 static void backtrace_address(void *data, unsigned long addr, int reliable)
1074 {
1075         struct perf_callchain_entry *entry = data;
1076
1077         if (reliable)
1078                 callchain_store(entry, addr);
1079 }
1080
1081 static const struct stacktrace_ops backtrace_ops = {
1082         .warning                = backtrace_warning,
1083         .warning_symbol         = backtrace_warning_symbol,
1084         .stack                  = backtrace_stack,
1085         .address                = backtrace_address,
1086 };
1087
1088 static void
1089 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1090 {
1091         unsigned long bp;
1092         char *stack;
1093         int nr = entry->nr;
1094
1095         callchain_store(entry, instruction_pointer(regs));
1096
1097         stack = ((char *)regs + sizeof(struct pt_regs));
1098 #ifdef CONFIG_FRAME_POINTER
1099         bp = frame_pointer(regs);
1100 #else
1101         bp = 0;
1102 #endif
1103
1104         dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
1105
1106         entry->kernel = entry->nr - nr;
1107 }
1108
1109
1110 struct stack_frame {
1111         const void __user       *next_fp;
1112         unsigned long           return_address;
1113 };
1114
1115 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1116 {
1117         int ret;
1118
1119         if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
1120                 return 0;
1121
1122         ret = 1;
1123         pagefault_disable();
1124         if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1125                 ret = 0;
1126         pagefault_enable();
1127
1128         return ret;
1129 }
1130
1131 static void
1132 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1133 {
1134         struct stack_frame frame;
1135         const void __user *fp;
1136         int nr = entry->nr;
1137
1138         regs = (struct pt_regs *)current->thread.sp0 - 1;
1139         fp   = (void __user *)regs->bp;
1140
1141         callchain_store(entry, regs->ip);
1142
1143         while (entry->nr < MAX_STACK_DEPTH) {
1144                 frame.next_fp        = NULL;
1145                 frame.return_address = 0;
1146
1147                 if (!copy_stack_frame(fp, &frame))
1148                         break;
1149
1150                 if ((unsigned long)fp < user_stack_pointer(regs))
1151                         break;
1152
1153                 callchain_store(entry, frame.return_address);
1154                 fp = frame.next_fp;
1155         }
1156
1157         entry->user = entry->nr - nr;
1158 }
1159
1160 static void
1161 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1162 {
1163         int is_user;
1164
1165         if (!regs)
1166                 return;
1167
1168         is_user = user_mode(regs);
1169
1170         if (!current || current->pid == 0)
1171                 return;
1172
1173         if (is_user && current->state != TASK_RUNNING)
1174                 return;
1175
1176         if (!is_user)
1177                 perf_callchain_kernel(regs, entry);
1178
1179         if (current->mm)
1180                 perf_callchain_user(regs, entry);
1181 }
1182
1183 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1184 {
1185         struct perf_callchain_entry *entry;
1186
1187         if (in_nmi())
1188                 entry = &__get_cpu_var(nmi_entry);
1189         else
1190                 entry = &__get_cpu_var(irq_entry);
1191
1192         entry->nr = 0;
1193         entry->hv = 0;
1194         entry->kernel = 0;
1195         entry->user = 0;
1196
1197         perf_do_callchain(regs, entry);
1198
1199         return entry;
1200 }