]> git.karo-electronics.de Git - mv-sheeva.git/blob - arch/arm/kernel/perf_event.c
perf: Reduce perf_disable() usage
[mv-sheeva.git] / arch / arm / kernel / perf_event.c
1 #undef DEBUG
2
3 /*
4  * ARM performance counter support.
5  *
6  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7  *
8  * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9  * 2010 (c) MontaVista Software, LLC.
10  *
11  * This code is based on the sparc64 perf event code, which is in turn based
12  * on the x86 code. Callchain code is based on the ARM OProfile backtrace
13  * code.
14  */
15 #define pr_fmt(fmt) "hw perfevents: " fmt
16
17 #include <linux/interrupt.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/perf_event.h>
21 #include <linux/platform_device.h>
22 #include <linux/spinlock.h>
23 #include <linux/uaccess.h>
24
25 #include <asm/cputype.h>
26 #include <asm/irq.h>
27 #include <asm/irq_regs.h>
28 #include <asm/pmu.h>
29 #include <asm/stacktrace.h>
30
31 static struct platform_device *pmu_device;
32
33 /*
34  * Hardware lock to serialize accesses to PMU registers. Needed for the
35  * read/modify/write sequences.
36  */
37 DEFINE_SPINLOCK(pmu_lock);
38
39 /*
40  * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
41  * another platform that supports more, we need to increase this to be the
42  * largest of all platforms.
43  *
44  * ARMv7 supports up to 32 events:
45  *  cycle counter CCNT + 31 events counters CNT0..30.
46  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
47  */
48 #define ARMPMU_MAX_HWEVENTS             33
49
50 /* The events for a given CPU. */
51 struct cpu_hw_events {
52         /*
53          * The events that are active on the CPU for the given index. Index 0
54          * is reserved.
55          */
56         struct perf_event       *events[ARMPMU_MAX_HWEVENTS];
57
58         /*
59          * A 1 bit for an index indicates that the counter is being used for
60          * an event. A 0 means that the counter can be used.
61          */
62         unsigned long           used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
63
64         /*
65          * A 1 bit for an index indicates that the counter is actively being
66          * used.
67          */
68         unsigned long           active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
69 };
70 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
71
72 /* PMU names. */
73 static const char *arm_pmu_names[] = {
74         [ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
75         [ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
76         [ARM_PERF_PMU_ID_V6]      = "v6",
77         [ARM_PERF_PMU_ID_V6MP]    = "v6mpcore",
78         [ARM_PERF_PMU_ID_CA8]     = "ARMv7 Cortex-A8",
79         [ARM_PERF_PMU_ID_CA9]     = "ARMv7 Cortex-A9",
80 };
81
82 struct arm_pmu {
83         enum arm_perf_pmu_ids id;
84         irqreturn_t     (*handle_irq)(int irq_num, void *dev);
85         void            (*enable)(struct hw_perf_event *evt, int idx);
86         void            (*disable)(struct hw_perf_event *evt, int idx);
87         int             (*event_map)(int evt);
88         u64             (*raw_event)(u64);
89         int             (*get_event_idx)(struct cpu_hw_events *cpuc,
90                                          struct hw_perf_event *hwc);
91         u32             (*read_counter)(int idx);
92         void            (*write_counter)(int idx, u32 val);
93         void            (*start)(void);
94         void            (*stop)(void);
95         int             num_events;
96         u64             max_period;
97 };
98
99 /* Set at runtime when we know what CPU type we are. */
100 static const struct arm_pmu *armpmu;
101
102 enum arm_perf_pmu_ids
103 armpmu_get_pmu_id(void)
104 {
105         int id = -ENODEV;
106
107         if (armpmu != NULL)
108                 id = armpmu->id;
109
110         return id;
111 }
112 EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
113
114 int
115 armpmu_get_max_events(void)
116 {
117         int max_events = 0;
118
119         if (armpmu != NULL)
120                 max_events = armpmu->num_events;
121
122         return max_events;
123 }
124 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
125
126 #define HW_OP_UNSUPPORTED               0xFFFF
127
128 #define C(_x) \
129         PERF_COUNT_HW_CACHE_##_x
130
131 #define CACHE_OP_UNSUPPORTED            0xFFFF
132
133 static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
134                                      [PERF_COUNT_HW_CACHE_OP_MAX]
135                                      [PERF_COUNT_HW_CACHE_RESULT_MAX];
136
137 static int
138 armpmu_map_cache_event(u64 config)
139 {
140         unsigned int cache_type, cache_op, cache_result, ret;
141
142         cache_type = (config >>  0) & 0xff;
143         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
144                 return -EINVAL;
145
146         cache_op = (config >>  8) & 0xff;
147         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
148                 return -EINVAL;
149
150         cache_result = (config >> 16) & 0xff;
151         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
152                 return -EINVAL;
153
154         ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
155
156         if (ret == CACHE_OP_UNSUPPORTED)
157                 return -ENOENT;
158
159         return ret;
160 }
161
162 static int
163 armpmu_event_set_period(struct perf_event *event,
164                         struct hw_perf_event *hwc,
165                         int idx)
166 {
167         s64 left = local64_read(&hwc->period_left);
168         s64 period = hwc->sample_period;
169         int ret = 0;
170
171         if (unlikely(left <= -period)) {
172                 left = period;
173                 local64_set(&hwc->period_left, left);
174                 hwc->last_period = period;
175                 ret = 1;
176         }
177
178         if (unlikely(left <= 0)) {
179                 left += period;
180                 local64_set(&hwc->period_left, left);
181                 hwc->last_period = period;
182                 ret = 1;
183         }
184
185         if (left > (s64)armpmu->max_period)
186                 left = armpmu->max_period;
187
188         local64_set(&hwc->prev_count, (u64)-left);
189
190         armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
191
192         perf_event_update_userpage(event);
193
194         return ret;
195 }
196
197 static u64
198 armpmu_event_update(struct perf_event *event,
199                     struct hw_perf_event *hwc,
200                     int idx)
201 {
202         int shift = 64 - 32;
203         s64 prev_raw_count, new_raw_count;
204         u64 delta;
205
206 again:
207         prev_raw_count = local64_read(&hwc->prev_count);
208         new_raw_count = armpmu->read_counter(idx);
209
210         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
211                              new_raw_count) != prev_raw_count)
212                 goto again;
213
214         delta = (new_raw_count << shift) - (prev_raw_count << shift);
215         delta >>= shift;
216
217         local64_add(delta, &event->count);
218         local64_sub(delta, &hwc->period_left);
219
220         return new_raw_count;
221 }
222
223 static void
224 armpmu_disable(struct perf_event *event)
225 {
226         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
227         struct hw_perf_event *hwc = &event->hw;
228         int idx = hwc->idx;
229
230         WARN_ON(idx < 0);
231
232         clear_bit(idx, cpuc->active_mask);
233         armpmu->disable(hwc, idx);
234
235         barrier();
236
237         armpmu_event_update(event, hwc, idx);
238         cpuc->events[idx] = NULL;
239         clear_bit(idx, cpuc->used_mask);
240
241         perf_event_update_userpage(event);
242 }
243
244 static void
245 armpmu_read(struct perf_event *event)
246 {
247         struct hw_perf_event *hwc = &event->hw;
248
249         /* Don't read disabled counters! */
250         if (hwc->idx < 0)
251                 return;
252
253         armpmu_event_update(event, hwc, hwc->idx);
254 }
255
256 static void
257 armpmu_unthrottle(struct perf_event *event)
258 {
259         struct hw_perf_event *hwc = &event->hw;
260
261         /*
262          * Set the period again. Some counters can't be stopped, so when we
263          * were throttled we simply disabled the IRQ source and the counter
264          * may have been left counting. If we don't do this step then we may
265          * get an interrupt too soon or *way* too late if the overflow has
266          * happened since disabling.
267          */
268         armpmu_event_set_period(event, hwc, hwc->idx);
269         armpmu->enable(hwc, hwc->idx);
270 }
271
272 static int
273 armpmu_enable(struct perf_event *event)
274 {
275         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
276         struct hw_perf_event *hwc = &event->hw;
277         int idx;
278         int err = 0;
279
280         perf_disable();
281
282         /* If we don't have a space for the counter then finish early. */
283         idx = armpmu->get_event_idx(cpuc, hwc);
284         if (idx < 0) {
285                 err = idx;
286                 goto out;
287         }
288
289         /*
290          * If there is an event in the counter we are going to use then make
291          * sure it is disabled.
292          */
293         event->hw.idx = idx;
294         armpmu->disable(hwc, idx);
295         cpuc->events[idx] = event;
296         set_bit(idx, cpuc->active_mask);
297
298         /* Set the period for the event. */
299         armpmu_event_set_period(event, hwc, idx);
300
301         /* Enable the event. */
302         armpmu->enable(hwc, idx);
303
304         /* Propagate our changes to the userspace mapping. */
305         perf_event_update_userpage(event);
306
307 out:
308         perf_enable();
309         return err;
310 }
311
312 static struct pmu pmu;
313
314 static int
315 validate_event(struct cpu_hw_events *cpuc,
316                struct perf_event *event)
317 {
318         struct hw_perf_event fake_event = event->hw;
319
320         if (event->pmu && event->pmu != &pmu)
321                 return 0;
322
323         return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
324 }
325
326 static int
327 validate_group(struct perf_event *event)
328 {
329         struct perf_event *sibling, *leader = event->group_leader;
330         struct cpu_hw_events fake_pmu;
331
332         memset(&fake_pmu, 0, sizeof(fake_pmu));
333
334         if (!validate_event(&fake_pmu, leader))
335                 return -ENOSPC;
336
337         list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
338                 if (!validate_event(&fake_pmu, sibling))
339                         return -ENOSPC;
340         }
341
342         if (!validate_event(&fake_pmu, event))
343                 return -ENOSPC;
344
345         return 0;
346 }
347
348 static int
349 armpmu_reserve_hardware(void)
350 {
351         int i, err = -ENODEV, irq;
352
353         pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
354         if (IS_ERR(pmu_device)) {
355                 pr_warning("unable to reserve pmu\n");
356                 return PTR_ERR(pmu_device);
357         }
358
359         init_pmu(ARM_PMU_DEVICE_CPU);
360
361         if (pmu_device->num_resources < 1) {
362                 pr_err("no irqs for PMUs defined\n");
363                 return -ENODEV;
364         }
365
366         for (i = 0; i < pmu_device->num_resources; ++i) {
367                 irq = platform_get_irq(pmu_device, i);
368                 if (irq < 0)
369                         continue;
370
371                 err = request_irq(irq, armpmu->handle_irq,
372                                   IRQF_DISABLED | IRQF_NOBALANCING,
373                                   "armpmu", NULL);
374                 if (err) {
375                         pr_warning("unable to request IRQ%d for ARM perf "
376                                 "counters\n", irq);
377                         break;
378                 }
379         }
380
381         if (err) {
382                 for (i = i - 1; i >= 0; --i) {
383                         irq = platform_get_irq(pmu_device, i);
384                         if (irq >= 0)
385                                 free_irq(irq, NULL);
386                 }
387                 release_pmu(pmu_device);
388                 pmu_device = NULL;
389         }
390
391         return err;
392 }
393
394 static void
395 armpmu_release_hardware(void)
396 {
397         int i, irq;
398
399         for (i = pmu_device->num_resources - 1; i >= 0; --i) {
400                 irq = platform_get_irq(pmu_device, i);
401                 if (irq >= 0)
402                         free_irq(irq, NULL);
403         }
404         armpmu->stop();
405
406         release_pmu(pmu_device);
407         pmu_device = NULL;
408 }
409
410 static atomic_t active_events = ATOMIC_INIT(0);
411 static DEFINE_MUTEX(pmu_reserve_mutex);
412
413 static void
414 hw_perf_event_destroy(struct perf_event *event)
415 {
416         if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
417                 armpmu_release_hardware();
418                 mutex_unlock(&pmu_reserve_mutex);
419         }
420 }
421
422 static int
423 __hw_perf_event_init(struct perf_event *event)
424 {
425         struct hw_perf_event *hwc = &event->hw;
426         int mapping, err;
427
428         /* Decode the generic type into an ARM event identifier. */
429         if (PERF_TYPE_HARDWARE == event->attr.type) {
430                 mapping = armpmu->event_map(event->attr.config);
431         } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
432                 mapping = armpmu_map_cache_event(event->attr.config);
433         } else if (PERF_TYPE_RAW == event->attr.type) {
434                 mapping = armpmu->raw_event(event->attr.config);
435         } else {
436                 pr_debug("event type %x not supported\n", event->attr.type);
437                 return -EOPNOTSUPP;
438         }
439
440         if (mapping < 0) {
441                 pr_debug("event %x:%llx not supported\n", event->attr.type,
442                          event->attr.config);
443                 return mapping;
444         }
445
446         /*
447          * Check whether we need to exclude the counter from certain modes.
448          * The ARM performance counters are on all of the time so if someone
449          * has asked us for some excludes then we have to fail.
450          */
451         if (event->attr.exclude_kernel || event->attr.exclude_user ||
452             event->attr.exclude_hv || event->attr.exclude_idle) {
453                 pr_debug("ARM performance counters do not support "
454                          "mode exclusion\n");
455                 return -EPERM;
456         }
457
458         /*
459          * We don't assign an index until we actually place the event onto
460          * hardware. Use -1 to signify that we haven't decided where to put it
461          * yet. For SMP systems, each core has it's own PMU so we can't do any
462          * clever allocation or constraints checking at this point.
463          */
464         hwc->idx = -1;
465
466         /*
467          * Store the event encoding into the config_base field. config and
468          * event_base are unused as the only 2 things we need to know are
469          * the event mapping and the counter to use. The counter to use is
470          * also the indx and the config_base is the event type.
471          */
472         hwc->config_base            = (unsigned long)mapping;
473         hwc->config                 = 0;
474         hwc->event_base             = 0;
475
476         if (!hwc->sample_period) {
477                 hwc->sample_period  = armpmu->max_period;
478                 hwc->last_period    = hwc->sample_period;
479                 local64_set(&hwc->period_left, hwc->sample_period);
480         }
481
482         err = 0;
483         if (event->group_leader != event) {
484                 err = validate_group(event);
485                 if (err)
486                         return -EINVAL;
487         }
488
489         return err;
490 }
491
492 static int armpmu_event_init(struct perf_event *event)
493 {
494         int err = 0;
495
496         switch (event->attr.type) {
497         case PERF_TYPE_RAW:
498         case PERF_TYPE_HARDWARE:
499         case PERF_TYPE_HW_CACHE:
500                 break;
501
502         default:
503                 return -ENOENT;
504         }
505
506         if (!armpmu)
507                 return -ENODEV;
508
509         event->destroy = hw_perf_event_destroy;
510
511         if (!atomic_inc_not_zero(&active_events)) {
512                 if (atomic_read(&active_events) > perf_max_events) {
513                         atomic_dec(&active_events);
514                         return -ENOSPC;
515                 }
516
517                 mutex_lock(&pmu_reserve_mutex);
518                 if (atomic_read(&active_events) == 0) {
519                         err = armpmu_reserve_hardware();
520                 }
521
522                 if (!err)
523                         atomic_inc(&active_events);
524                 mutex_unlock(&pmu_reserve_mutex);
525         }
526
527         if (err)
528                 return err;
529
530         err = __hw_perf_event_init(event);
531         if (err)
532                 hw_perf_event_destroy(event);
533
534         return err;
535 }
536
537 static struct pmu pmu = {
538         .event_init = armpmu_event_init,
539         .enable     = armpmu_enable,
540         .disable    = armpmu_disable,
541         .unthrottle = armpmu_unthrottle,
542         .read       = armpmu_read,
543 };
544
545 void
546 hw_perf_enable(void)
547 {
548         /* Enable all of the perf events on hardware. */
549         int idx;
550         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
551
552         if (!armpmu)
553                 return;
554
555         for (idx = 0; idx <= armpmu->num_events; ++idx) {
556                 struct perf_event *event = cpuc->events[idx];
557
558                 if (!event)
559                         continue;
560
561                 armpmu->enable(&event->hw, idx);
562         }
563
564         armpmu->start();
565 }
566
567 void
568 hw_perf_disable(void)
569 {
570         if (armpmu)
571                 armpmu->stop();
572 }
573
574 /*
575  * ARMv6 Performance counter handling code.
576  *
577  * ARMv6 has 2 configurable performance counters and a single cycle counter.
578  * They all share a single reset bit but can be written to zero so we can use
579  * that for a reset.
580  *
581  * The counters can't be individually enabled or disabled so when we remove
582  * one event and replace it with another we could get spurious counts from the
583  * wrong event. However, we can take advantage of the fact that the
584  * performance counters can export events to the event bus, and the event bus
585  * itself can be monitored. This requires that we *don't* export the events to
586  * the event bus. The procedure for disabling a configurable counter is:
587  *      - change the counter to count the ETMEXTOUT[0] signal (0x20). This
588  *        effectively stops the counter from counting.
589  *      - disable the counter's interrupt generation (each counter has it's
590  *        own interrupt enable bit).
591  * Once stopped, the counter value can be written as 0 to reset.
592  *
593  * To enable a counter:
594  *      - enable the counter's interrupt generation.
595  *      - set the new event type.
596  *
597  * Note: the dedicated cycle counter only counts cycles and can't be
598  * enabled/disabled independently of the others. When we want to disable the
599  * cycle counter, we have to just disable the interrupt reporting and start
600  * ignoring that counter. When re-enabling, we have to reset the value and
601  * enable the interrupt.
602  */
603
604 enum armv6_perf_types {
605         ARMV6_PERFCTR_ICACHE_MISS           = 0x0,
606         ARMV6_PERFCTR_IBUF_STALL            = 0x1,
607         ARMV6_PERFCTR_DDEP_STALL            = 0x2,
608         ARMV6_PERFCTR_ITLB_MISS             = 0x3,
609         ARMV6_PERFCTR_DTLB_MISS             = 0x4,
610         ARMV6_PERFCTR_BR_EXEC               = 0x5,
611         ARMV6_PERFCTR_BR_MISPREDICT         = 0x6,
612         ARMV6_PERFCTR_INSTR_EXEC            = 0x7,
613         ARMV6_PERFCTR_DCACHE_HIT            = 0x9,
614         ARMV6_PERFCTR_DCACHE_ACCESS         = 0xA,
615         ARMV6_PERFCTR_DCACHE_MISS           = 0xB,
616         ARMV6_PERFCTR_DCACHE_WBACK          = 0xC,
617         ARMV6_PERFCTR_SW_PC_CHANGE          = 0xD,
618         ARMV6_PERFCTR_MAIN_TLB_MISS         = 0xF,
619         ARMV6_PERFCTR_EXPL_D_ACCESS         = 0x10,
620         ARMV6_PERFCTR_LSU_FULL_STALL        = 0x11,
621         ARMV6_PERFCTR_WBUF_DRAINED          = 0x12,
622         ARMV6_PERFCTR_CPU_CYCLES            = 0xFF,
623         ARMV6_PERFCTR_NOP                   = 0x20,
624 };
625
626 enum armv6_counters {
627         ARMV6_CYCLE_COUNTER = 1,
628         ARMV6_COUNTER0,
629         ARMV6_COUNTER1,
630 };
631
632 /*
633  * The hardware events that we support. We do support cache operations but
634  * we have harvard caches and no way to combine instruction and data
635  * accesses/misses in hardware.
636  */
637 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
638         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6_PERFCTR_CPU_CYCLES,
639         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6_PERFCTR_INSTR_EXEC,
640         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
641         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
642         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
643         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6_PERFCTR_BR_MISPREDICT,
644         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
645 };
646
647 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
648                                           [PERF_COUNT_HW_CACHE_OP_MAX]
649                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
650         [C(L1D)] = {
651                 /*
652                  * The performance counters don't differentiate between read
653                  * and write accesses/misses so this isn't strictly correct,
654                  * but it's the best we can do. Writes and reads get
655                  * combined.
656                  */
657                 [C(OP_READ)] = {
658                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
659                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
660                 },
661                 [C(OP_WRITE)] = {
662                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
663                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
664                 },
665                 [C(OP_PREFETCH)] = {
666                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
667                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
668                 },
669         },
670         [C(L1I)] = {
671                 [C(OP_READ)] = {
672                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
673                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
674                 },
675                 [C(OP_WRITE)] = {
676                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
677                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
678                 },
679                 [C(OP_PREFETCH)] = {
680                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
681                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
682                 },
683         },
684         [C(LL)] = {
685                 [C(OP_READ)] = {
686                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
687                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
688                 },
689                 [C(OP_WRITE)] = {
690                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
691                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
692                 },
693                 [C(OP_PREFETCH)] = {
694                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
695                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
696                 },
697         },
698         [C(DTLB)] = {
699                 /*
700                  * The ARM performance counters can count micro DTLB misses,
701                  * micro ITLB misses and main TLB misses. There isn't an event
702                  * for TLB misses, so use the micro misses here and if users
703                  * want the main TLB misses they can use a raw counter.
704                  */
705                 [C(OP_READ)] = {
706                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
707                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
708                 },
709                 [C(OP_WRITE)] = {
710                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
711                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
712                 },
713                 [C(OP_PREFETCH)] = {
714                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
715                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
716                 },
717         },
718         [C(ITLB)] = {
719                 [C(OP_READ)] = {
720                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
721                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
722                 },
723                 [C(OP_WRITE)] = {
724                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
725                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
726                 },
727                 [C(OP_PREFETCH)] = {
728                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
729                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
730                 },
731         },
732         [C(BPU)] = {
733                 [C(OP_READ)] = {
734                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
735                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
736                 },
737                 [C(OP_WRITE)] = {
738                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
739                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
740                 },
741                 [C(OP_PREFETCH)] = {
742                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
743                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
744                 },
745         },
746 };
747
748 enum armv6mpcore_perf_types {
749         ARMV6MPCORE_PERFCTR_ICACHE_MISS     = 0x0,
750         ARMV6MPCORE_PERFCTR_IBUF_STALL      = 0x1,
751         ARMV6MPCORE_PERFCTR_DDEP_STALL      = 0x2,
752         ARMV6MPCORE_PERFCTR_ITLB_MISS       = 0x3,
753         ARMV6MPCORE_PERFCTR_DTLB_MISS       = 0x4,
754         ARMV6MPCORE_PERFCTR_BR_EXEC         = 0x5,
755         ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
756         ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
757         ARMV6MPCORE_PERFCTR_INSTR_EXEC      = 0x8,
758         ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
759         ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
760         ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
761         ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
762         ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
763         ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
764         ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
765         ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
766         ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
767         ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
768         ARMV6MPCORE_PERFCTR_CPU_CYCLES      = 0xFF,
769 };
770
771 /*
772  * The hardware events that we support. We do support cache operations but
773  * we have harvard caches and no way to combine instruction and data
774  * accesses/misses in hardware.
775  */
776 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
777         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
778         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
779         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
780         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
781         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
782         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
783         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
784 };
785
786 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
787                                         [PERF_COUNT_HW_CACHE_OP_MAX]
788                                         [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
789         [C(L1D)] = {
790                 [C(OP_READ)] = {
791                         [C(RESULT_ACCESS)]  =
792                                 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
793                         [C(RESULT_MISS)]    =
794                                 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
795                 },
796                 [C(OP_WRITE)] = {
797                         [C(RESULT_ACCESS)]  =
798                                 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
799                         [C(RESULT_MISS)]    =
800                                 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
801                 },
802                 [C(OP_PREFETCH)] = {
803                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
804                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
805                 },
806         },
807         [C(L1I)] = {
808                 [C(OP_READ)] = {
809                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
810                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
811                 },
812                 [C(OP_WRITE)] = {
813                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
814                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
815                 },
816                 [C(OP_PREFETCH)] = {
817                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
818                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
819                 },
820         },
821         [C(LL)] = {
822                 [C(OP_READ)] = {
823                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
824                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
825                 },
826                 [C(OP_WRITE)] = {
827                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
828                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
829                 },
830                 [C(OP_PREFETCH)] = {
831                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
832                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
833                 },
834         },
835         [C(DTLB)] = {
836                 /*
837                  * The ARM performance counters can count micro DTLB misses,
838                  * micro ITLB misses and main TLB misses. There isn't an event
839                  * for TLB misses, so use the micro misses here and if users
840                  * want the main TLB misses they can use a raw counter.
841                  */
842                 [C(OP_READ)] = {
843                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
844                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
845                 },
846                 [C(OP_WRITE)] = {
847                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
848                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
849                 },
850                 [C(OP_PREFETCH)] = {
851                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
852                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
853                 },
854         },
855         [C(ITLB)] = {
856                 [C(OP_READ)] = {
857                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
858                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
859                 },
860                 [C(OP_WRITE)] = {
861                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
862                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
863                 },
864                 [C(OP_PREFETCH)] = {
865                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
866                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
867                 },
868         },
869         [C(BPU)] = {
870                 [C(OP_READ)] = {
871                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
872                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
873                 },
874                 [C(OP_WRITE)] = {
875                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
876                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
877                 },
878                 [C(OP_PREFETCH)] = {
879                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
880                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
881                 },
882         },
883 };
884
885 static inline unsigned long
886 armv6_pmcr_read(void)
887 {
888         u32 val;
889         asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
890         return val;
891 }
892
893 static inline void
894 armv6_pmcr_write(unsigned long val)
895 {
896         asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
897 }
898
899 #define ARMV6_PMCR_ENABLE               (1 << 0)
900 #define ARMV6_PMCR_CTR01_RESET          (1 << 1)
901 #define ARMV6_PMCR_CCOUNT_RESET         (1 << 2)
902 #define ARMV6_PMCR_CCOUNT_DIV           (1 << 3)
903 #define ARMV6_PMCR_COUNT0_IEN           (1 << 4)
904 #define ARMV6_PMCR_COUNT1_IEN           (1 << 5)
905 #define ARMV6_PMCR_CCOUNT_IEN           (1 << 6)
906 #define ARMV6_PMCR_COUNT0_OVERFLOW      (1 << 8)
907 #define ARMV6_PMCR_COUNT1_OVERFLOW      (1 << 9)
908 #define ARMV6_PMCR_CCOUNT_OVERFLOW      (1 << 10)
909 #define ARMV6_PMCR_EVT_COUNT0_SHIFT     20
910 #define ARMV6_PMCR_EVT_COUNT0_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
911 #define ARMV6_PMCR_EVT_COUNT1_SHIFT     12
912 #define ARMV6_PMCR_EVT_COUNT1_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
913
914 #define ARMV6_PMCR_OVERFLOWED_MASK \
915         (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
916          ARMV6_PMCR_CCOUNT_OVERFLOW)
917
918 static inline int
919 armv6_pmcr_has_overflowed(unsigned long pmcr)
920 {
921         return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
922 }
923
924 static inline int
925 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
926                                   enum armv6_counters counter)
927 {
928         int ret = 0;
929
930         if (ARMV6_CYCLE_COUNTER == counter)
931                 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
932         else if (ARMV6_COUNTER0 == counter)
933                 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
934         else if (ARMV6_COUNTER1 == counter)
935                 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
936         else
937                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
938
939         return ret;
940 }
941
942 static inline u32
943 armv6pmu_read_counter(int counter)
944 {
945         unsigned long value = 0;
946
947         if (ARMV6_CYCLE_COUNTER == counter)
948                 asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
949         else if (ARMV6_COUNTER0 == counter)
950                 asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
951         else if (ARMV6_COUNTER1 == counter)
952                 asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
953         else
954                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
955
956         return value;
957 }
958
959 static inline void
960 armv6pmu_write_counter(int counter,
961                        u32 value)
962 {
963         if (ARMV6_CYCLE_COUNTER == counter)
964                 asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
965         else if (ARMV6_COUNTER0 == counter)
966                 asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
967         else if (ARMV6_COUNTER1 == counter)
968                 asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
969         else
970                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
971 }
972
973 void
974 armv6pmu_enable_event(struct hw_perf_event *hwc,
975                       int idx)
976 {
977         unsigned long val, mask, evt, flags;
978
979         if (ARMV6_CYCLE_COUNTER == idx) {
980                 mask    = 0;
981                 evt     = ARMV6_PMCR_CCOUNT_IEN;
982         } else if (ARMV6_COUNTER0 == idx) {
983                 mask    = ARMV6_PMCR_EVT_COUNT0_MASK;
984                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
985                           ARMV6_PMCR_COUNT0_IEN;
986         } else if (ARMV6_COUNTER1 == idx) {
987                 mask    = ARMV6_PMCR_EVT_COUNT1_MASK;
988                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
989                           ARMV6_PMCR_COUNT1_IEN;
990         } else {
991                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
992                 return;
993         }
994
995         /*
996          * Mask out the current event and set the counter to count the event
997          * that we're interested in.
998          */
999         spin_lock_irqsave(&pmu_lock, flags);
1000         val = armv6_pmcr_read();
1001         val &= ~mask;
1002         val |= evt;
1003         armv6_pmcr_write(val);
1004         spin_unlock_irqrestore(&pmu_lock, flags);
1005 }
1006
1007 static irqreturn_t
1008 armv6pmu_handle_irq(int irq_num,
1009                     void *dev)
1010 {
1011         unsigned long pmcr = armv6_pmcr_read();
1012         struct perf_sample_data data;
1013         struct cpu_hw_events *cpuc;
1014         struct pt_regs *regs;
1015         int idx;
1016
1017         if (!armv6_pmcr_has_overflowed(pmcr))
1018                 return IRQ_NONE;
1019
1020         regs = get_irq_regs();
1021
1022         /*
1023          * The interrupts are cleared by writing the overflow flags back to
1024          * the control register. All of the other bits don't have any effect
1025          * if they are rewritten, so write the whole value back.
1026          */
1027         armv6_pmcr_write(pmcr);
1028
1029         perf_sample_data_init(&data, 0);
1030
1031         cpuc = &__get_cpu_var(cpu_hw_events);
1032         for (idx = 0; idx <= armpmu->num_events; ++idx) {
1033                 struct perf_event *event = cpuc->events[idx];
1034                 struct hw_perf_event *hwc;
1035
1036                 if (!test_bit(idx, cpuc->active_mask))
1037                         continue;
1038
1039                 /*
1040                  * We have a single interrupt for all counters. Check that
1041                  * each counter has overflowed before we process it.
1042                  */
1043                 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1044                         continue;
1045
1046                 hwc = &event->hw;
1047                 armpmu_event_update(event, hwc, idx);
1048                 data.period = event->hw.last_period;
1049                 if (!armpmu_event_set_period(event, hwc, idx))
1050                         continue;
1051
1052                 if (perf_event_overflow(event, 0, &data, regs))
1053                         armpmu->disable(hwc, idx);
1054         }
1055
1056         /*
1057          * Handle the pending perf events.
1058          *
1059          * Note: this call *must* be run with interrupts enabled. For
1060          * platforms that can have the PMU interrupts raised as a PMI, this
1061          * will not work.
1062          */
1063         perf_event_do_pending();
1064
1065         return IRQ_HANDLED;
1066 }
1067
1068 static void
1069 armv6pmu_start(void)
1070 {
1071         unsigned long flags, val;
1072
1073         spin_lock_irqsave(&pmu_lock, flags);
1074         val = armv6_pmcr_read();
1075         val |= ARMV6_PMCR_ENABLE;
1076         armv6_pmcr_write(val);
1077         spin_unlock_irqrestore(&pmu_lock, flags);
1078 }
1079
1080 void
1081 armv6pmu_stop(void)
1082 {
1083         unsigned long flags, val;
1084
1085         spin_lock_irqsave(&pmu_lock, flags);
1086         val = armv6_pmcr_read();
1087         val &= ~ARMV6_PMCR_ENABLE;
1088         armv6_pmcr_write(val);
1089         spin_unlock_irqrestore(&pmu_lock, flags);
1090 }
1091
1092 static inline int
1093 armv6pmu_event_map(int config)
1094 {
1095         int mapping = armv6_perf_map[config];
1096         if (HW_OP_UNSUPPORTED == mapping)
1097                 mapping = -EOPNOTSUPP;
1098         return mapping;
1099 }
1100
1101 static inline int
1102 armv6mpcore_pmu_event_map(int config)
1103 {
1104         int mapping = armv6mpcore_perf_map[config];
1105         if (HW_OP_UNSUPPORTED == mapping)
1106                 mapping = -EOPNOTSUPP;
1107         return mapping;
1108 }
1109
1110 static u64
1111 armv6pmu_raw_event(u64 config)
1112 {
1113         return config & 0xff;
1114 }
1115
1116 static int
1117 armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1118                        struct hw_perf_event *event)
1119 {
1120         /* Always place a cycle counter into the cycle counter. */
1121         if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1122                 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1123                         return -EAGAIN;
1124
1125                 return ARMV6_CYCLE_COUNTER;
1126         } else {
1127                 /*
1128                  * For anything other than a cycle counter, try and use
1129                  * counter0 and counter1.
1130                  */
1131                 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1132                         return ARMV6_COUNTER1;
1133                 }
1134
1135                 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1136                         return ARMV6_COUNTER0;
1137                 }
1138
1139                 /* The counters are all in use. */
1140                 return -EAGAIN;
1141         }
1142 }
1143
1144 static void
1145 armv6pmu_disable_event(struct hw_perf_event *hwc,
1146                        int idx)
1147 {
1148         unsigned long val, mask, evt, flags;
1149
1150         if (ARMV6_CYCLE_COUNTER == idx) {
1151                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1152                 evt     = 0;
1153         } else if (ARMV6_COUNTER0 == idx) {
1154                 mask    = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1155                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1156         } else if (ARMV6_COUNTER1 == idx) {
1157                 mask    = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1158                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1159         } else {
1160                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1161                 return;
1162         }
1163
1164         /*
1165          * Mask out the current event and set the counter to count the number
1166          * of ETM bus signal assertion cycles. The external reporting should
1167          * be disabled and so this should never increment.
1168          */
1169         spin_lock_irqsave(&pmu_lock, flags);
1170         val = armv6_pmcr_read();
1171         val &= ~mask;
1172         val |= evt;
1173         armv6_pmcr_write(val);
1174         spin_unlock_irqrestore(&pmu_lock, flags);
1175 }
1176
1177 static void
1178 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1179                               int idx)
1180 {
1181         unsigned long val, mask, flags, evt = 0;
1182
1183         if (ARMV6_CYCLE_COUNTER == idx) {
1184                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1185         } else if (ARMV6_COUNTER0 == idx) {
1186                 mask    = ARMV6_PMCR_COUNT0_IEN;
1187         } else if (ARMV6_COUNTER1 == idx) {
1188                 mask    = ARMV6_PMCR_COUNT1_IEN;
1189         } else {
1190                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1191                 return;
1192         }
1193
1194         /*
1195          * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1196          * simply disable the interrupt reporting.
1197          */
1198         spin_lock_irqsave(&pmu_lock, flags);
1199         val = armv6_pmcr_read();
1200         val &= ~mask;
1201         val |= evt;
1202         armv6_pmcr_write(val);
1203         spin_unlock_irqrestore(&pmu_lock, flags);
1204 }
1205
1206 static const struct arm_pmu armv6pmu = {
1207         .id                     = ARM_PERF_PMU_ID_V6,
1208         .handle_irq             = armv6pmu_handle_irq,
1209         .enable                 = armv6pmu_enable_event,
1210         .disable                = armv6pmu_disable_event,
1211         .event_map              = armv6pmu_event_map,
1212         .raw_event              = armv6pmu_raw_event,
1213         .read_counter           = armv6pmu_read_counter,
1214         .write_counter          = armv6pmu_write_counter,
1215         .get_event_idx          = armv6pmu_get_event_idx,
1216         .start                  = armv6pmu_start,
1217         .stop                   = armv6pmu_stop,
1218         .num_events             = 3,
1219         .max_period             = (1LLU << 32) - 1,
1220 };
1221
1222 /*
1223  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1224  * that some of the events have different enumerations and that there is no
1225  * *hack* to stop the programmable counters. To stop the counters we simply
1226  * disable the interrupt reporting and update the event. When unthrottling we
1227  * reset the period and enable the interrupt reporting.
1228  */
1229 static const struct arm_pmu armv6mpcore_pmu = {
1230         .id                     = ARM_PERF_PMU_ID_V6MP,
1231         .handle_irq             = armv6pmu_handle_irq,
1232         .enable                 = armv6pmu_enable_event,
1233         .disable                = armv6mpcore_pmu_disable_event,
1234         .event_map              = armv6mpcore_pmu_event_map,
1235         .raw_event              = armv6pmu_raw_event,
1236         .read_counter           = armv6pmu_read_counter,
1237         .write_counter          = armv6pmu_write_counter,
1238         .get_event_idx          = armv6pmu_get_event_idx,
1239         .start                  = armv6pmu_start,
1240         .stop                   = armv6pmu_stop,
1241         .num_events             = 3,
1242         .max_period             = (1LLU << 32) - 1,
1243 };
1244
1245 /*
1246  * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1247  *
1248  * Copied from ARMv6 code, with the low level code inspired
1249  *  by the ARMv7 Oprofile code.
1250  *
1251  * Cortex-A8 has up to 4 configurable performance counters and
1252  *  a single cycle counter.
1253  * Cortex-A9 has up to 31 configurable performance counters and
1254  *  a single cycle counter.
1255  *
1256  * All counters can be enabled/disabled and IRQ masked separately. The cycle
1257  *  counter and all 4 performance counters together can be reset separately.
1258  */
1259
1260 /* Common ARMv7 event types */
1261 enum armv7_perf_types {
1262         ARMV7_PERFCTR_PMNC_SW_INCR              = 0x00,
1263         ARMV7_PERFCTR_IFETCH_MISS               = 0x01,
1264         ARMV7_PERFCTR_ITLB_MISS                 = 0x02,
1265         ARMV7_PERFCTR_DCACHE_REFILL             = 0x03,
1266         ARMV7_PERFCTR_DCACHE_ACCESS             = 0x04,
1267         ARMV7_PERFCTR_DTLB_REFILL               = 0x05,
1268         ARMV7_PERFCTR_DREAD                     = 0x06,
1269         ARMV7_PERFCTR_DWRITE                    = 0x07,
1270
1271         ARMV7_PERFCTR_EXC_TAKEN                 = 0x09,
1272         ARMV7_PERFCTR_EXC_EXECUTED              = 0x0A,
1273         ARMV7_PERFCTR_CID_WRITE                 = 0x0B,
1274         /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1275          * It counts:
1276          *  - all branch instructions,
1277          *  - instructions that explicitly write the PC,
1278          *  - exception generating instructions.
1279          */
1280         ARMV7_PERFCTR_PC_WRITE                  = 0x0C,
1281         ARMV7_PERFCTR_PC_IMM_BRANCH             = 0x0D,
1282         ARMV7_PERFCTR_UNALIGNED_ACCESS          = 0x0F,
1283         ARMV7_PERFCTR_PC_BRANCH_MIS_PRED        = 0x10,
1284         ARMV7_PERFCTR_CLOCK_CYCLES              = 0x11,
1285
1286         ARMV7_PERFCTR_PC_BRANCH_MIS_USED        = 0x12,
1287
1288         ARMV7_PERFCTR_CPU_CYCLES                = 0xFF
1289 };
1290
1291 /* ARMv7 Cortex-A8 specific event types */
1292 enum armv7_a8_perf_types {
1293         ARMV7_PERFCTR_INSTR_EXECUTED            = 0x08,
1294
1295         ARMV7_PERFCTR_PC_PROC_RETURN            = 0x0E,
1296
1297         ARMV7_PERFCTR_WRITE_BUFFER_FULL         = 0x40,
1298         ARMV7_PERFCTR_L2_STORE_MERGED           = 0x41,
1299         ARMV7_PERFCTR_L2_STORE_BUFF             = 0x42,
1300         ARMV7_PERFCTR_L2_ACCESS                 = 0x43,
1301         ARMV7_PERFCTR_L2_CACH_MISS              = 0x44,
1302         ARMV7_PERFCTR_AXI_READ_CYCLES           = 0x45,
1303         ARMV7_PERFCTR_AXI_WRITE_CYCLES          = 0x46,
1304         ARMV7_PERFCTR_MEMORY_REPLAY             = 0x47,
1305         ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY   = 0x48,
1306         ARMV7_PERFCTR_L1_DATA_MISS              = 0x49,
1307         ARMV7_PERFCTR_L1_INST_MISS              = 0x4A,
1308         ARMV7_PERFCTR_L1_DATA_COLORING          = 0x4B,
1309         ARMV7_PERFCTR_L1_NEON_DATA              = 0x4C,
1310         ARMV7_PERFCTR_L1_NEON_CACH_DATA         = 0x4D,
1311         ARMV7_PERFCTR_L2_NEON                   = 0x4E,
1312         ARMV7_PERFCTR_L2_NEON_HIT               = 0x4F,
1313         ARMV7_PERFCTR_L1_INST                   = 0x50,
1314         ARMV7_PERFCTR_PC_RETURN_MIS_PRED        = 0x51,
1315         ARMV7_PERFCTR_PC_BRANCH_FAILED          = 0x52,
1316         ARMV7_PERFCTR_PC_BRANCH_TAKEN           = 0x53,
1317         ARMV7_PERFCTR_PC_BRANCH_EXECUTED        = 0x54,
1318         ARMV7_PERFCTR_OP_EXECUTED               = 0x55,
1319         ARMV7_PERFCTR_CYCLES_INST_STALL         = 0x56,
1320         ARMV7_PERFCTR_CYCLES_INST               = 0x57,
1321         ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL    = 0x58,
1322         ARMV7_PERFCTR_CYCLES_NEON_INST_STALL    = 0x59,
1323         ARMV7_PERFCTR_NEON_CYCLES               = 0x5A,
1324
1325         ARMV7_PERFCTR_PMU0_EVENTS               = 0x70,
1326         ARMV7_PERFCTR_PMU1_EVENTS               = 0x71,
1327         ARMV7_PERFCTR_PMU_EVENTS                = 0x72,
1328 };
1329
1330 /* ARMv7 Cortex-A9 specific event types */
1331 enum armv7_a9_perf_types {
1332         ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC     = 0x40,
1333         ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC     = 0x41,
1334         ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC       = 0x42,
1335
1336         ARMV7_PERFCTR_COHERENT_LINE_MISS        = 0x50,
1337         ARMV7_PERFCTR_COHERENT_LINE_HIT         = 0x51,
1338
1339         ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES   = 0x60,
1340         ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES   = 0x61,
1341         ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
1342         ARMV7_PERFCTR_STREX_EXECUTED_PASSED     = 0x63,
1343         ARMV7_PERFCTR_STREX_EXECUTED_FAILED     = 0x64,
1344         ARMV7_PERFCTR_DATA_EVICTION             = 0x65,
1345         ARMV7_PERFCTR_ISSUE_STAGE_NO_INST       = 0x66,
1346         ARMV7_PERFCTR_ISSUE_STAGE_EMPTY         = 0x67,
1347         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE  = 0x68,
1348
1349         ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
1350
1351         ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST   = 0x70,
1352         ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
1353         ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST  = 0x72,
1354         ARMV7_PERFCTR_FP_EXECUTED_INST          = 0x73,
1355         ARMV7_PERFCTR_NEON_EXECUTED_INST        = 0x74,
1356
1357         ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
1358         ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES  = 0x81,
1359         ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES        = 0x82,
1360         ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES        = 0x83,
1361         ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES  = 0x84,
1362         ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES  = 0x85,
1363         ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES      = 0x86,
1364
1365         ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES  = 0x8A,
1366         ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
1367
1368         ARMV7_PERFCTR_ISB_INST                  = 0x90,
1369         ARMV7_PERFCTR_DSB_INST                  = 0x91,
1370         ARMV7_PERFCTR_DMB_INST                  = 0x92,
1371         ARMV7_PERFCTR_EXT_INTERRUPTS            = 0x93,
1372
1373         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED     = 0xA0,
1374         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED       = 0xA1,
1375         ARMV7_PERFCTR_PLE_FIFO_FLUSH            = 0xA2,
1376         ARMV7_PERFCTR_PLE_RQST_COMPLETED        = 0xA3,
1377         ARMV7_PERFCTR_PLE_FIFO_OVERFLOW         = 0xA4,
1378         ARMV7_PERFCTR_PLE_RQST_PROG             = 0xA5
1379 };
1380
1381 /*
1382  * Cortex-A8 HW events mapping
1383  *
1384  * The hardware events that we support. We do support cache operations but
1385  * we have harvard caches and no way to combine instruction and data
1386  * accesses/misses in hardware.
1387  */
1388 static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1389         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1390         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV7_PERFCTR_INSTR_EXECUTED,
1391         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
1392         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
1393         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1394         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1395         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1396 };
1397
1398 static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1399                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1400                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1401         [C(L1D)] = {
1402                 /*
1403                  * The performance counters don't differentiate between read
1404                  * and write accesses/misses so this isn't strictly correct,
1405                  * but it's the best we can do. Writes and reads get
1406                  * combined.
1407                  */
1408                 [C(OP_READ)] = {
1409                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1410                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1411                 },
1412                 [C(OP_WRITE)] = {
1413                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1414                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1415                 },
1416                 [C(OP_PREFETCH)] = {
1417                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1418                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1419                 },
1420         },
1421         [C(L1I)] = {
1422                 [C(OP_READ)] = {
1423                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1424                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1425                 },
1426                 [C(OP_WRITE)] = {
1427                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1428                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1429                 },
1430                 [C(OP_PREFETCH)] = {
1431                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1432                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1433                 },
1434         },
1435         [C(LL)] = {
1436                 [C(OP_READ)] = {
1437                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1438                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1439                 },
1440                 [C(OP_WRITE)] = {
1441                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1442                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1443                 },
1444                 [C(OP_PREFETCH)] = {
1445                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1446                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1447                 },
1448         },
1449         [C(DTLB)] = {
1450                 /*
1451                  * Only ITLB misses and DTLB refills are supported.
1452                  * If users want the DTLB refills misses a raw counter
1453                  * must be used.
1454                  */
1455                 [C(OP_READ)] = {
1456                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1457                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1458                 },
1459                 [C(OP_WRITE)] = {
1460                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1461                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1462                 },
1463                 [C(OP_PREFETCH)] = {
1464                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1465                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1466                 },
1467         },
1468         [C(ITLB)] = {
1469                 [C(OP_READ)] = {
1470                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1471                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1472                 },
1473                 [C(OP_WRITE)] = {
1474                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1475                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1476                 },
1477                 [C(OP_PREFETCH)] = {
1478                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1479                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1480                 },
1481         },
1482         [C(BPU)] = {
1483                 [C(OP_READ)] = {
1484                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1485                         [C(RESULT_MISS)]
1486                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1487                 },
1488                 [C(OP_WRITE)] = {
1489                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1490                         [C(RESULT_MISS)]
1491                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1492                 },
1493                 [C(OP_PREFETCH)] = {
1494                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1495                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1496                 },
1497         },
1498 };
1499
1500 /*
1501  * Cortex-A9 HW events mapping
1502  */
1503 static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1504         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1505         [PERF_COUNT_HW_INSTRUCTIONS]        =
1506                                         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1507         [PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1508         [PERF_COUNT_HW_CACHE_MISSES]        = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1509         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1510         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1511         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1512 };
1513
1514 static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1515                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1516                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1517         [C(L1D)] = {
1518                 /*
1519                  * The performance counters don't differentiate between read
1520                  * and write accesses/misses so this isn't strictly correct,
1521                  * but it's the best we can do. Writes and reads get
1522                  * combined.
1523                  */
1524                 [C(OP_READ)] = {
1525                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1526                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1527                 },
1528                 [C(OP_WRITE)] = {
1529                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1530                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1531                 },
1532                 [C(OP_PREFETCH)] = {
1533                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1534                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1535                 },
1536         },
1537         [C(L1I)] = {
1538                 [C(OP_READ)] = {
1539                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1540                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1541                 },
1542                 [C(OP_WRITE)] = {
1543                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1544                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1545                 },
1546                 [C(OP_PREFETCH)] = {
1547                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1548                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1549                 },
1550         },
1551         [C(LL)] = {
1552                 [C(OP_READ)] = {
1553                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1554                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1555                 },
1556                 [C(OP_WRITE)] = {
1557                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1558                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1559                 },
1560                 [C(OP_PREFETCH)] = {
1561                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1562                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1563                 },
1564         },
1565         [C(DTLB)] = {
1566                 /*
1567                  * Only ITLB misses and DTLB refills are supported.
1568                  * If users want the DTLB refills misses a raw counter
1569                  * must be used.
1570                  */
1571                 [C(OP_READ)] = {
1572                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1573                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1574                 },
1575                 [C(OP_WRITE)] = {
1576                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1577                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1578                 },
1579                 [C(OP_PREFETCH)] = {
1580                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1581                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1582                 },
1583         },
1584         [C(ITLB)] = {
1585                 [C(OP_READ)] = {
1586                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1587                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1588                 },
1589                 [C(OP_WRITE)] = {
1590                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1591                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1592                 },
1593                 [C(OP_PREFETCH)] = {
1594                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1595                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1596                 },
1597         },
1598         [C(BPU)] = {
1599                 [C(OP_READ)] = {
1600                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1601                         [C(RESULT_MISS)]
1602                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1603                 },
1604                 [C(OP_WRITE)] = {
1605                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1606                         [C(RESULT_MISS)]
1607                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1608                 },
1609                 [C(OP_PREFETCH)] = {
1610                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1611                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1612                 },
1613         },
1614 };
1615
1616 /*
1617  * Perf Events counters
1618  */
1619 enum armv7_counters {
1620         ARMV7_CYCLE_COUNTER             = 1,    /* Cycle counter */
1621         ARMV7_COUNTER0                  = 2,    /* First event counter */
1622 };
1623
1624 /*
1625  * The cycle counter is ARMV7_CYCLE_COUNTER.
1626  * The first event counter is ARMV7_COUNTER0.
1627  * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1628  */
1629 #define ARMV7_COUNTER_LAST      (ARMV7_COUNTER0 + armpmu->num_events - 1)
1630
1631 /*
1632  * ARMv7 low level PMNC access
1633  */
1634
1635 /*
1636  * Per-CPU PMNC: config reg
1637  */
1638 #define ARMV7_PMNC_E            (1 << 0) /* Enable all counters */
1639 #define ARMV7_PMNC_P            (1 << 1) /* Reset all counters */
1640 #define ARMV7_PMNC_C            (1 << 2) /* Cycle counter reset */
1641 #define ARMV7_PMNC_D            (1 << 3) /* CCNT counts every 64th cpu cycle */
1642 #define ARMV7_PMNC_X            (1 << 4) /* Export to ETM */
1643 #define ARMV7_PMNC_DP           (1 << 5) /* Disable CCNT if non-invasive debug*/
1644 #define ARMV7_PMNC_N_SHIFT      11       /* Number of counters supported */
1645 #define ARMV7_PMNC_N_MASK       0x1f
1646 #define ARMV7_PMNC_MASK         0x3f     /* Mask for writable bits */
1647
1648 /*
1649  * Available counters
1650  */
1651 #define ARMV7_CNT0              0       /* First event counter */
1652 #define ARMV7_CCNT              31      /* Cycle counter */
1653
1654 /* Perf Event to low level counters mapping */
1655 #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
1656
1657 /*
1658  * CNTENS: counters enable reg
1659  */
1660 #define ARMV7_CNTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1661 #define ARMV7_CNTENS_C          (1 << ARMV7_CCNT)
1662
1663 /*
1664  * CNTENC: counters disable reg
1665  */
1666 #define ARMV7_CNTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1667 #define ARMV7_CNTENC_C          (1 << ARMV7_CCNT)
1668
1669 /*
1670  * INTENS: counters overflow interrupt enable reg
1671  */
1672 #define ARMV7_INTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1673 #define ARMV7_INTENS_C          (1 << ARMV7_CCNT)
1674
1675 /*
1676  * INTENC: counters overflow interrupt disable reg
1677  */
1678 #define ARMV7_INTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1679 #define ARMV7_INTENC_C          (1 << ARMV7_CCNT)
1680
1681 /*
1682  * EVTSEL: Event selection reg
1683  */
1684 #define ARMV7_EVTSEL_MASK       0xff            /* Mask for writable bits */
1685
1686 /*
1687  * SELECT: Counter selection reg
1688  */
1689 #define ARMV7_SELECT_MASK       0x1f            /* Mask for writable bits */
1690
1691 /*
1692  * FLAG: counters overflow flag status reg
1693  */
1694 #define ARMV7_FLAG_P(idx)       (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1695 #define ARMV7_FLAG_C            (1 << ARMV7_CCNT)
1696 #define ARMV7_FLAG_MASK         0xffffffff      /* Mask for writable bits */
1697 #define ARMV7_OVERFLOWED_MASK   ARMV7_FLAG_MASK
1698
1699 static inline unsigned long armv7_pmnc_read(void)
1700 {
1701         u32 val;
1702         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1703         return val;
1704 }
1705
1706 static inline void armv7_pmnc_write(unsigned long val)
1707 {
1708         val &= ARMV7_PMNC_MASK;
1709         asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1710 }
1711
1712 static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1713 {
1714         return pmnc & ARMV7_OVERFLOWED_MASK;
1715 }
1716
1717 static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1718                                         enum armv7_counters counter)
1719 {
1720         int ret;
1721
1722         if (counter == ARMV7_CYCLE_COUNTER)
1723                 ret = pmnc & ARMV7_FLAG_C;
1724         else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1725                 ret = pmnc & ARMV7_FLAG_P(counter);
1726         else
1727                 pr_err("CPU%u checking wrong counter %d overflow status\n",
1728                         smp_processor_id(), counter);
1729
1730         return ret;
1731 }
1732
1733 static inline int armv7_pmnc_select_counter(unsigned int idx)
1734 {
1735         u32 val;
1736
1737         if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1738                 pr_err("CPU%u selecting wrong PMNC counter"
1739                         " %d\n", smp_processor_id(), idx);
1740                 return -1;
1741         }
1742
1743         val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1744         asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1745
1746         return idx;
1747 }
1748
1749 static inline u32 armv7pmu_read_counter(int idx)
1750 {
1751         unsigned long value = 0;
1752
1753         if (idx == ARMV7_CYCLE_COUNTER)
1754                 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1755         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1756                 if (armv7_pmnc_select_counter(idx) == idx)
1757                         asm volatile("mrc p15, 0, %0, c9, c13, 2"
1758                                      : "=r" (value));
1759         } else
1760                 pr_err("CPU%u reading wrong counter %d\n",
1761                         smp_processor_id(), idx);
1762
1763         return value;
1764 }
1765
1766 static inline void armv7pmu_write_counter(int idx, u32 value)
1767 {
1768         if (idx == ARMV7_CYCLE_COUNTER)
1769                 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1770         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1771                 if (armv7_pmnc_select_counter(idx) == idx)
1772                         asm volatile("mcr p15, 0, %0, c9, c13, 2"
1773                                      : : "r" (value));
1774         } else
1775                 pr_err("CPU%u writing wrong counter %d\n",
1776                         smp_processor_id(), idx);
1777 }
1778
1779 static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1780 {
1781         if (armv7_pmnc_select_counter(idx) == idx) {
1782                 val &= ARMV7_EVTSEL_MASK;
1783                 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1784         }
1785 }
1786
1787 static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1788 {
1789         u32 val;
1790
1791         if ((idx != ARMV7_CYCLE_COUNTER) &&
1792             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1793                 pr_err("CPU%u enabling wrong PMNC counter"
1794                         " %d\n", smp_processor_id(), idx);
1795                 return -1;
1796         }
1797
1798         if (idx == ARMV7_CYCLE_COUNTER)
1799                 val = ARMV7_CNTENS_C;
1800         else
1801                 val = ARMV7_CNTENS_P(idx);
1802
1803         asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1804
1805         return idx;
1806 }
1807
1808 static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1809 {
1810         u32 val;
1811
1812
1813         if ((idx != ARMV7_CYCLE_COUNTER) &&
1814             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1815                 pr_err("CPU%u disabling wrong PMNC counter"
1816                         " %d\n", smp_processor_id(), idx);
1817                 return -1;
1818         }
1819
1820         if (idx == ARMV7_CYCLE_COUNTER)
1821                 val = ARMV7_CNTENC_C;
1822         else
1823                 val = ARMV7_CNTENC_P(idx);
1824
1825         asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1826
1827         return idx;
1828 }
1829
1830 static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1831 {
1832         u32 val;
1833
1834         if ((idx != ARMV7_CYCLE_COUNTER) &&
1835             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1836                 pr_err("CPU%u enabling wrong PMNC counter"
1837                         " interrupt enable %d\n", smp_processor_id(), idx);
1838                 return -1;
1839         }
1840
1841         if (idx == ARMV7_CYCLE_COUNTER)
1842                 val = ARMV7_INTENS_C;
1843         else
1844                 val = ARMV7_INTENS_P(idx);
1845
1846         asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1847
1848         return idx;
1849 }
1850
1851 static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1852 {
1853         u32 val;
1854
1855         if ((idx != ARMV7_CYCLE_COUNTER) &&
1856             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1857                 pr_err("CPU%u disabling wrong PMNC counter"
1858                         " interrupt enable %d\n", smp_processor_id(), idx);
1859                 return -1;
1860         }
1861
1862         if (idx == ARMV7_CYCLE_COUNTER)
1863                 val = ARMV7_INTENC_C;
1864         else
1865                 val = ARMV7_INTENC_P(idx);
1866
1867         asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1868
1869         return idx;
1870 }
1871
1872 static inline u32 armv7_pmnc_getreset_flags(void)
1873 {
1874         u32 val;
1875
1876         /* Read */
1877         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1878
1879         /* Write to clear flags */
1880         val &= ARMV7_FLAG_MASK;
1881         asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1882
1883         return val;
1884 }
1885
1886 #ifdef DEBUG
1887 static void armv7_pmnc_dump_regs(void)
1888 {
1889         u32 val;
1890         unsigned int cnt;
1891
1892         printk(KERN_INFO "PMNC registers dump:\n");
1893
1894         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1895         printk(KERN_INFO "PMNC  =0x%08x\n", val);
1896
1897         asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1898         printk(KERN_INFO "CNTENS=0x%08x\n", val);
1899
1900         asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1901         printk(KERN_INFO "INTENS=0x%08x\n", val);
1902
1903         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1904         printk(KERN_INFO "FLAGS =0x%08x\n", val);
1905
1906         asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1907         printk(KERN_INFO "SELECT=0x%08x\n", val);
1908
1909         asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1910         printk(KERN_INFO "CCNT  =0x%08x\n", val);
1911
1912         for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1913                 armv7_pmnc_select_counter(cnt);
1914                 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1915                 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1916                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1917                 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1918                 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1919                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1920         }
1921 }
1922 #endif
1923
1924 void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1925 {
1926         unsigned long flags;
1927
1928         /*
1929          * Enable counter and interrupt, and set the counter to count
1930          * the event that we're interested in.
1931          */
1932         spin_lock_irqsave(&pmu_lock, flags);
1933
1934         /*
1935          * Disable counter
1936          */
1937         armv7_pmnc_disable_counter(idx);
1938
1939         /*
1940          * Set event (if destined for PMNx counters)
1941          * We don't need to set the event if it's a cycle count
1942          */
1943         if (idx != ARMV7_CYCLE_COUNTER)
1944                 armv7_pmnc_write_evtsel(idx, hwc->config_base);
1945
1946         /*
1947          * Enable interrupt for this counter
1948          */
1949         armv7_pmnc_enable_intens(idx);
1950
1951         /*
1952          * Enable counter
1953          */
1954         armv7_pmnc_enable_counter(idx);
1955
1956         spin_unlock_irqrestore(&pmu_lock, flags);
1957 }
1958
1959 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1960 {
1961         unsigned long flags;
1962
1963         /*
1964          * Disable counter and interrupt
1965          */
1966         spin_lock_irqsave(&pmu_lock, flags);
1967
1968         /*
1969          * Disable counter
1970          */
1971         armv7_pmnc_disable_counter(idx);
1972
1973         /*
1974          * Disable interrupt for this counter
1975          */
1976         armv7_pmnc_disable_intens(idx);
1977
1978         spin_unlock_irqrestore(&pmu_lock, flags);
1979 }
1980
1981 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
1982 {
1983         unsigned long pmnc;
1984         struct perf_sample_data data;
1985         struct cpu_hw_events *cpuc;
1986         struct pt_regs *regs;
1987         int idx;
1988
1989         /*
1990          * Get and reset the IRQ flags
1991          */
1992         pmnc = armv7_pmnc_getreset_flags();
1993
1994         /*
1995          * Did an overflow occur?
1996          */
1997         if (!armv7_pmnc_has_overflowed(pmnc))
1998                 return IRQ_NONE;
1999
2000         /*
2001          * Handle the counter(s) overflow(s)
2002          */
2003         regs = get_irq_regs();
2004
2005         perf_sample_data_init(&data, 0);
2006
2007         cpuc = &__get_cpu_var(cpu_hw_events);
2008         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2009                 struct perf_event *event = cpuc->events[idx];
2010                 struct hw_perf_event *hwc;
2011
2012                 if (!test_bit(idx, cpuc->active_mask))
2013                         continue;
2014
2015                 /*
2016                  * We have a single interrupt for all counters. Check that
2017                  * each counter has overflowed before we process it.
2018                  */
2019                 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2020                         continue;
2021
2022                 hwc = &event->hw;
2023                 armpmu_event_update(event, hwc, idx);
2024                 data.period = event->hw.last_period;
2025                 if (!armpmu_event_set_period(event, hwc, idx))
2026                         continue;
2027
2028                 if (perf_event_overflow(event, 0, &data, regs))
2029                         armpmu->disable(hwc, idx);
2030         }
2031
2032         /*
2033          * Handle the pending perf events.
2034          *
2035          * Note: this call *must* be run with interrupts enabled. For
2036          * platforms that can have the PMU interrupts raised as a PMI, this
2037          * will not work.
2038          */
2039         perf_event_do_pending();
2040
2041         return IRQ_HANDLED;
2042 }
2043
2044 static void armv7pmu_start(void)
2045 {
2046         unsigned long flags;
2047
2048         spin_lock_irqsave(&pmu_lock, flags);
2049         /* Enable all counters */
2050         armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2051         spin_unlock_irqrestore(&pmu_lock, flags);
2052 }
2053
2054 static void armv7pmu_stop(void)
2055 {
2056         unsigned long flags;
2057
2058         spin_lock_irqsave(&pmu_lock, flags);
2059         /* Disable all counters */
2060         armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2061         spin_unlock_irqrestore(&pmu_lock, flags);
2062 }
2063
2064 static inline int armv7_a8_pmu_event_map(int config)
2065 {
2066         int mapping = armv7_a8_perf_map[config];
2067         if (HW_OP_UNSUPPORTED == mapping)
2068                 mapping = -EOPNOTSUPP;
2069         return mapping;
2070 }
2071
2072 static inline int armv7_a9_pmu_event_map(int config)
2073 {
2074         int mapping = armv7_a9_perf_map[config];
2075         if (HW_OP_UNSUPPORTED == mapping)
2076                 mapping = -EOPNOTSUPP;
2077         return mapping;
2078 }
2079
2080 static u64 armv7pmu_raw_event(u64 config)
2081 {
2082         return config & 0xff;
2083 }
2084
2085 static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2086                                   struct hw_perf_event *event)
2087 {
2088         int idx;
2089
2090         /* Always place a cycle counter into the cycle counter. */
2091         if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2092                 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2093                         return -EAGAIN;
2094
2095                 return ARMV7_CYCLE_COUNTER;
2096         } else {
2097                 /*
2098                  * For anything other than a cycle counter, try and use
2099                  * the events counters
2100                  */
2101                 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2102                         if (!test_and_set_bit(idx, cpuc->used_mask))
2103                                 return idx;
2104                 }
2105
2106                 /* The counters are all in use. */
2107                 return -EAGAIN;
2108         }
2109 }
2110
2111 static struct arm_pmu armv7pmu = {
2112         .handle_irq             = armv7pmu_handle_irq,
2113         .enable                 = armv7pmu_enable_event,
2114         .disable                = armv7pmu_disable_event,
2115         .raw_event              = armv7pmu_raw_event,
2116         .read_counter           = armv7pmu_read_counter,
2117         .write_counter          = armv7pmu_write_counter,
2118         .get_event_idx          = armv7pmu_get_event_idx,
2119         .start                  = armv7pmu_start,
2120         .stop                   = armv7pmu_stop,
2121         .max_period             = (1LLU << 32) - 1,
2122 };
2123
2124 static u32 __init armv7_reset_read_pmnc(void)
2125 {
2126         u32 nb_cnt;
2127
2128         /* Initialize & Reset PMNC: C and P bits */
2129         armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2130
2131         /* Read the nb of CNTx counters supported from PMNC */
2132         nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2133
2134         /* Add the CPU cycles counter and return */
2135         return nb_cnt + 1;
2136 }
2137
2138 /*
2139  * ARMv5 [xscale] Performance counter handling code.
2140  *
2141  * Based on xscale OProfile code.
2142  *
2143  * There are two variants of the xscale PMU that we support:
2144  *      - xscale1pmu: 2 event counters and a cycle counter
2145  *      - xscale2pmu: 4 event counters and a cycle counter
2146  * The two variants share event definitions, but have different
2147  * PMU structures.
2148  */
2149
2150 enum xscale_perf_types {
2151         XSCALE_PERFCTR_ICACHE_MISS              = 0x00,
2152         XSCALE_PERFCTR_ICACHE_NO_DELIVER        = 0x01,
2153         XSCALE_PERFCTR_DATA_STALL               = 0x02,
2154         XSCALE_PERFCTR_ITLB_MISS                = 0x03,
2155         XSCALE_PERFCTR_DTLB_MISS                = 0x04,
2156         XSCALE_PERFCTR_BRANCH                   = 0x05,
2157         XSCALE_PERFCTR_BRANCH_MISS              = 0x06,
2158         XSCALE_PERFCTR_INSTRUCTION              = 0x07,
2159         XSCALE_PERFCTR_DCACHE_FULL_STALL        = 0x08,
2160         XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2161         XSCALE_PERFCTR_DCACHE_ACCESS            = 0x0A,
2162         XSCALE_PERFCTR_DCACHE_MISS              = 0x0B,
2163         XSCALE_PERFCTR_DCACHE_WRITE_BACK        = 0x0C,
2164         XSCALE_PERFCTR_PC_CHANGED               = 0x0D,
2165         XSCALE_PERFCTR_BCU_REQUEST              = 0x10,
2166         XSCALE_PERFCTR_BCU_FULL                 = 0x11,
2167         XSCALE_PERFCTR_BCU_DRAIN                = 0x12,
2168         XSCALE_PERFCTR_BCU_ECC_NO_ELOG          = 0x14,
2169         XSCALE_PERFCTR_BCU_1_BIT_ERR            = 0x15,
2170         XSCALE_PERFCTR_RMW                      = 0x16,
2171         /* XSCALE_PERFCTR_CCNT is not hardware defined */
2172         XSCALE_PERFCTR_CCNT                     = 0xFE,
2173         XSCALE_PERFCTR_UNUSED                   = 0xFF,
2174 };
2175
2176 enum xscale_counters {
2177         XSCALE_CYCLE_COUNTER    = 1,
2178         XSCALE_COUNTER0,
2179         XSCALE_COUNTER1,
2180         XSCALE_COUNTER2,
2181         XSCALE_COUNTER3,
2182 };
2183
2184 static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2185         [PERF_COUNT_HW_CPU_CYCLES]          = XSCALE_PERFCTR_CCNT,
2186         [PERF_COUNT_HW_INSTRUCTIONS]        = XSCALE_PERFCTR_INSTRUCTION,
2187         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
2188         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
2189         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2190         [PERF_COUNT_HW_BRANCH_MISSES]       = XSCALE_PERFCTR_BRANCH_MISS,
2191         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
2192 };
2193
2194 static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2195                                            [PERF_COUNT_HW_CACHE_OP_MAX]
2196                                            [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2197         [C(L1D)] = {
2198                 [C(OP_READ)] = {
2199                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2200                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2201                 },
2202                 [C(OP_WRITE)] = {
2203                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2204                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2205                 },
2206                 [C(OP_PREFETCH)] = {
2207                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2208                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2209                 },
2210         },
2211         [C(L1I)] = {
2212                 [C(OP_READ)] = {
2213                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2214                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2215                 },
2216                 [C(OP_WRITE)] = {
2217                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2218                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2219                 },
2220                 [C(OP_PREFETCH)] = {
2221                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2222                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2223                 },
2224         },
2225         [C(LL)] = {
2226                 [C(OP_READ)] = {
2227                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2228                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2229                 },
2230                 [C(OP_WRITE)] = {
2231                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2232                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2233                 },
2234                 [C(OP_PREFETCH)] = {
2235                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2236                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2237                 },
2238         },
2239         [C(DTLB)] = {
2240                 [C(OP_READ)] = {
2241                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2242                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2243                 },
2244                 [C(OP_WRITE)] = {
2245                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2246                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2247                 },
2248                 [C(OP_PREFETCH)] = {
2249                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2250                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2251                 },
2252         },
2253         [C(ITLB)] = {
2254                 [C(OP_READ)] = {
2255                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2256                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2257                 },
2258                 [C(OP_WRITE)] = {
2259                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2260                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2261                 },
2262                 [C(OP_PREFETCH)] = {
2263                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2264                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2265                 },
2266         },
2267         [C(BPU)] = {
2268                 [C(OP_READ)] = {
2269                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2270                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2271                 },
2272                 [C(OP_WRITE)] = {
2273                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2274                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2275                 },
2276                 [C(OP_PREFETCH)] = {
2277                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2278                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2279                 },
2280         },
2281 };
2282
2283 #define XSCALE_PMU_ENABLE       0x001
2284 #define XSCALE_PMN_RESET        0x002
2285 #define XSCALE_CCNT_RESET       0x004
2286 #define XSCALE_PMU_RESET        (CCNT_RESET | PMN_RESET)
2287 #define XSCALE_PMU_CNT64        0x008
2288
2289 static inline int
2290 xscalepmu_event_map(int config)
2291 {
2292         int mapping = xscale_perf_map[config];
2293         if (HW_OP_UNSUPPORTED == mapping)
2294                 mapping = -EOPNOTSUPP;
2295         return mapping;
2296 }
2297
2298 static u64
2299 xscalepmu_raw_event(u64 config)
2300 {
2301         return config & 0xff;
2302 }
2303
2304 #define XSCALE1_OVERFLOWED_MASK 0x700
2305 #define XSCALE1_CCOUNT_OVERFLOW 0x400
2306 #define XSCALE1_COUNT0_OVERFLOW 0x100
2307 #define XSCALE1_COUNT1_OVERFLOW 0x200
2308 #define XSCALE1_CCOUNT_INT_EN   0x040
2309 #define XSCALE1_COUNT0_INT_EN   0x010
2310 #define XSCALE1_COUNT1_INT_EN   0x020
2311 #define XSCALE1_COUNT0_EVT_SHFT 12
2312 #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2313 #define XSCALE1_COUNT1_EVT_SHFT 20
2314 #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2315
2316 static inline u32
2317 xscale1pmu_read_pmnc(void)
2318 {
2319         u32 val;
2320         asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2321         return val;
2322 }
2323
2324 static inline void
2325 xscale1pmu_write_pmnc(u32 val)
2326 {
2327         /* upper 4bits and 7, 11 are write-as-0 */
2328         val &= 0xffff77f;
2329         asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2330 }
2331
2332 static inline int
2333 xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2334                                         enum xscale_counters counter)
2335 {
2336         int ret = 0;
2337
2338         switch (counter) {
2339         case XSCALE_CYCLE_COUNTER:
2340                 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2341                 break;
2342         case XSCALE_COUNTER0:
2343                 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2344                 break;
2345         case XSCALE_COUNTER1:
2346                 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2347                 break;
2348         default:
2349                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2350         }
2351
2352         return ret;
2353 }
2354
2355 static irqreturn_t
2356 xscale1pmu_handle_irq(int irq_num, void *dev)
2357 {
2358         unsigned long pmnc;
2359         struct perf_sample_data data;
2360         struct cpu_hw_events *cpuc;
2361         struct pt_regs *regs;
2362         int idx;
2363
2364         /*
2365          * NOTE: there's an A stepping erratum that states if an overflow
2366          *       bit already exists and another occurs, the previous
2367          *       Overflow bit gets cleared. There's no workaround.
2368          *       Fixed in B stepping or later.
2369          */
2370         pmnc = xscale1pmu_read_pmnc();
2371
2372         /*
2373          * Write the value back to clear the overflow flags. Overflow
2374          * flags remain in pmnc for use below. We also disable the PMU
2375          * while we process the interrupt.
2376          */
2377         xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2378
2379         if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2380                 return IRQ_NONE;
2381
2382         regs = get_irq_regs();
2383
2384         perf_sample_data_init(&data, 0);
2385
2386         cpuc = &__get_cpu_var(cpu_hw_events);
2387         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2388                 struct perf_event *event = cpuc->events[idx];
2389                 struct hw_perf_event *hwc;
2390
2391                 if (!test_bit(idx, cpuc->active_mask))
2392                         continue;
2393
2394                 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2395                         continue;
2396
2397                 hwc = &event->hw;
2398                 armpmu_event_update(event, hwc, idx);
2399                 data.period = event->hw.last_period;
2400                 if (!armpmu_event_set_period(event, hwc, idx))
2401                         continue;
2402
2403                 if (perf_event_overflow(event, 0, &data, regs))
2404                         armpmu->disable(hwc, idx);
2405         }
2406
2407         perf_event_do_pending();
2408
2409         /*
2410          * Re-enable the PMU.
2411          */
2412         pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2413         xscale1pmu_write_pmnc(pmnc);
2414
2415         return IRQ_HANDLED;
2416 }
2417
2418 static void
2419 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2420 {
2421         unsigned long val, mask, evt, flags;
2422
2423         switch (idx) {
2424         case XSCALE_CYCLE_COUNTER:
2425                 mask = 0;
2426                 evt = XSCALE1_CCOUNT_INT_EN;
2427                 break;
2428         case XSCALE_COUNTER0:
2429                 mask = XSCALE1_COUNT0_EVT_MASK;
2430                 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2431                         XSCALE1_COUNT0_INT_EN;
2432                 break;
2433         case XSCALE_COUNTER1:
2434                 mask = XSCALE1_COUNT1_EVT_MASK;
2435                 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2436                         XSCALE1_COUNT1_INT_EN;
2437                 break;
2438         default:
2439                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2440                 return;
2441         }
2442
2443         spin_lock_irqsave(&pmu_lock, flags);
2444         val = xscale1pmu_read_pmnc();
2445         val &= ~mask;
2446         val |= evt;
2447         xscale1pmu_write_pmnc(val);
2448         spin_unlock_irqrestore(&pmu_lock, flags);
2449 }
2450
2451 static void
2452 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2453 {
2454         unsigned long val, mask, evt, flags;
2455
2456         switch (idx) {
2457         case XSCALE_CYCLE_COUNTER:
2458                 mask = XSCALE1_CCOUNT_INT_EN;
2459                 evt = 0;
2460                 break;
2461         case XSCALE_COUNTER0:
2462                 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2463                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2464                 break;
2465         case XSCALE_COUNTER1:
2466                 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2467                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2468                 break;
2469         default:
2470                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2471                 return;
2472         }
2473
2474         spin_lock_irqsave(&pmu_lock, flags);
2475         val = xscale1pmu_read_pmnc();
2476         val &= ~mask;
2477         val |= evt;
2478         xscale1pmu_write_pmnc(val);
2479         spin_unlock_irqrestore(&pmu_lock, flags);
2480 }
2481
2482 static int
2483 xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2484                         struct hw_perf_event *event)
2485 {
2486         if (XSCALE_PERFCTR_CCNT == event->config_base) {
2487                 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2488                         return -EAGAIN;
2489
2490                 return XSCALE_CYCLE_COUNTER;
2491         } else {
2492                 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2493                         return XSCALE_COUNTER1;
2494                 }
2495
2496                 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2497                         return XSCALE_COUNTER0;
2498                 }
2499
2500                 return -EAGAIN;
2501         }
2502 }
2503
2504 static void
2505 xscale1pmu_start(void)
2506 {
2507         unsigned long flags, val;
2508
2509         spin_lock_irqsave(&pmu_lock, flags);
2510         val = xscale1pmu_read_pmnc();
2511         val |= XSCALE_PMU_ENABLE;
2512         xscale1pmu_write_pmnc(val);
2513         spin_unlock_irqrestore(&pmu_lock, flags);
2514 }
2515
2516 static void
2517 xscale1pmu_stop(void)
2518 {
2519         unsigned long flags, val;
2520
2521         spin_lock_irqsave(&pmu_lock, flags);
2522         val = xscale1pmu_read_pmnc();
2523         val &= ~XSCALE_PMU_ENABLE;
2524         xscale1pmu_write_pmnc(val);
2525         spin_unlock_irqrestore(&pmu_lock, flags);
2526 }
2527
2528 static inline u32
2529 xscale1pmu_read_counter(int counter)
2530 {
2531         u32 val = 0;
2532
2533         switch (counter) {
2534         case XSCALE_CYCLE_COUNTER:
2535                 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2536                 break;
2537         case XSCALE_COUNTER0:
2538                 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2539                 break;
2540         case XSCALE_COUNTER1:
2541                 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2542                 break;
2543         }
2544
2545         return val;
2546 }
2547
2548 static inline void
2549 xscale1pmu_write_counter(int counter, u32 val)
2550 {
2551         switch (counter) {
2552         case XSCALE_CYCLE_COUNTER:
2553                 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2554                 break;
2555         case XSCALE_COUNTER0:
2556                 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2557                 break;
2558         case XSCALE_COUNTER1:
2559                 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2560                 break;
2561         }
2562 }
2563
2564 static const struct arm_pmu xscale1pmu = {
2565         .id             = ARM_PERF_PMU_ID_XSCALE1,
2566         .handle_irq     = xscale1pmu_handle_irq,
2567         .enable         = xscale1pmu_enable_event,
2568         .disable        = xscale1pmu_disable_event,
2569         .event_map      = xscalepmu_event_map,
2570         .raw_event      = xscalepmu_raw_event,
2571         .read_counter   = xscale1pmu_read_counter,
2572         .write_counter  = xscale1pmu_write_counter,
2573         .get_event_idx  = xscale1pmu_get_event_idx,
2574         .start          = xscale1pmu_start,
2575         .stop           = xscale1pmu_stop,
2576         .num_events     = 3,
2577         .max_period     = (1LLU << 32) - 1,
2578 };
2579
2580 #define XSCALE2_OVERFLOWED_MASK 0x01f
2581 #define XSCALE2_CCOUNT_OVERFLOW 0x001
2582 #define XSCALE2_COUNT0_OVERFLOW 0x002
2583 #define XSCALE2_COUNT1_OVERFLOW 0x004
2584 #define XSCALE2_COUNT2_OVERFLOW 0x008
2585 #define XSCALE2_COUNT3_OVERFLOW 0x010
2586 #define XSCALE2_CCOUNT_INT_EN   0x001
2587 #define XSCALE2_COUNT0_INT_EN   0x002
2588 #define XSCALE2_COUNT1_INT_EN   0x004
2589 #define XSCALE2_COUNT2_INT_EN   0x008
2590 #define XSCALE2_COUNT3_INT_EN   0x010
2591 #define XSCALE2_COUNT0_EVT_SHFT 0
2592 #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2593 #define XSCALE2_COUNT1_EVT_SHFT 8
2594 #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2595 #define XSCALE2_COUNT2_EVT_SHFT 16
2596 #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2597 #define XSCALE2_COUNT3_EVT_SHFT 24
2598 #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2599
2600 static inline u32
2601 xscale2pmu_read_pmnc(void)
2602 {
2603         u32 val;
2604         asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2605         /* bits 1-2 and 4-23 are read-unpredictable */
2606         return val & 0xff000009;
2607 }
2608
2609 static inline void
2610 xscale2pmu_write_pmnc(u32 val)
2611 {
2612         /* bits 4-23 are write-as-0, 24-31 are write ignored */
2613         val &= 0xf;
2614         asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2615 }
2616
2617 static inline u32
2618 xscale2pmu_read_overflow_flags(void)
2619 {
2620         u32 val;
2621         asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2622         return val;
2623 }
2624
2625 static inline void
2626 xscale2pmu_write_overflow_flags(u32 val)
2627 {
2628         asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2629 }
2630
2631 static inline u32
2632 xscale2pmu_read_event_select(void)
2633 {
2634         u32 val;
2635         asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2636         return val;
2637 }
2638
2639 static inline void
2640 xscale2pmu_write_event_select(u32 val)
2641 {
2642         asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2643 }
2644
2645 static inline u32
2646 xscale2pmu_read_int_enable(void)
2647 {
2648         u32 val;
2649         asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2650         return val;
2651 }
2652
2653 static void
2654 xscale2pmu_write_int_enable(u32 val)
2655 {
2656         asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2657 }
2658
2659 static inline int
2660 xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2661                                         enum xscale_counters counter)
2662 {
2663         int ret = 0;
2664
2665         switch (counter) {
2666         case XSCALE_CYCLE_COUNTER:
2667                 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2668                 break;
2669         case XSCALE_COUNTER0:
2670                 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2671                 break;
2672         case XSCALE_COUNTER1:
2673                 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2674                 break;
2675         case XSCALE_COUNTER2:
2676                 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2677                 break;
2678         case XSCALE_COUNTER3:
2679                 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2680                 break;
2681         default:
2682                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2683         }
2684
2685         return ret;
2686 }
2687
2688 static irqreturn_t
2689 xscale2pmu_handle_irq(int irq_num, void *dev)
2690 {
2691         unsigned long pmnc, of_flags;
2692         struct perf_sample_data data;
2693         struct cpu_hw_events *cpuc;
2694         struct pt_regs *regs;
2695         int idx;
2696
2697         /* Disable the PMU. */
2698         pmnc = xscale2pmu_read_pmnc();
2699         xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2700
2701         /* Check the overflow flag register. */
2702         of_flags = xscale2pmu_read_overflow_flags();
2703         if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2704                 return IRQ_NONE;
2705
2706         /* Clear the overflow bits. */
2707         xscale2pmu_write_overflow_flags(of_flags);
2708
2709         regs = get_irq_regs();
2710
2711         perf_sample_data_init(&data, 0);
2712
2713         cpuc = &__get_cpu_var(cpu_hw_events);
2714         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2715                 struct perf_event *event = cpuc->events[idx];
2716                 struct hw_perf_event *hwc;
2717
2718                 if (!test_bit(idx, cpuc->active_mask))
2719                         continue;
2720
2721                 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2722                         continue;
2723
2724                 hwc = &event->hw;
2725                 armpmu_event_update(event, hwc, idx);
2726                 data.period = event->hw.last_period;
2727                 if (!armpmu_event_set_period(event, hwc, idx))
2728                         continue;
2729
2730                 if (perf_event_overflow(event, 0, &data, regs))
2731                         armpmu->disable(hwc, idx);
2732         }
2733
2734         perf_event_do_pending();
2735
2736         /*
2737          * Re-enable the PMU.
2738          */
2739         pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2740         xscale2pmu_write_pmnc(pmnc);
2741
2742         return IRQ_HANDLED;
2743 }
2744
2745 static void
2746 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2747 {
2748         unsigned long flags, ien, evtsel;
2749
2750         ien = xscale2pmu_read_int_enable();
2751         evtsel = xscale2pmu_read_event_select();
2752
2753         switch (idx) {
2754         case XSCALE_CYCLE_COUNTER:
2755                 ien |= XSCALE2_CCOUNT_INT_EN;
2756                 break;
2757         case XSCALE_COUNTER0:
2758                 ien |= XSCALE2_COUNT0_INT_EN;
2759                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2760                 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2761                 break;
2762         case XSCALE_COUNTER1:
2763                 ien |= XSCALE2_COUNT1_INT_EN;
2764                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2765                 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2766                 break;
2767         case XSCALE_COUNTER2:
2768                 ien |= XSCALE2_COUNT2_INT_EN;
2769                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2770                 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2771                 break;
2772         case XSCALE_COUNTER3:
2773                 ien |= XSCALE2_COUNT3_INT_EN;
2774                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2775                 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2776                 break;
2777         default:
2778                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2779                 return;
2780         }
2781
2782         spin_lock_irqsave(&pmu_lock, flags);
2783         xscale2pmu_write_event_select(evtsel);
2784         xscale2pmu_write_int_enable(ien);
2785         spin_unlock_irqrestore(&pmu_lock, flags);
2786 }
2787
2788 static void
2789 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2790 {
2791         unsigned long flags, ien, evtsel;
2792
2793         ien = xscale2pmu_read_int_enable();
2794         evtsel = xscale2pmu_read_event_select();
2795
2796         switch (idx) {
2797         case XSCALE_CYCLE_COUNTER:
2798                 ien &= ~XSCALE2_CCOUNT_INT_EN;
2799                 break;
2800         case XSCALE_COUNTER0:
2801                 ien &= ~XSCALE2_COUNT0_INT_EN;
2802                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2803                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2804                 break;
2805         case XSCALE_COUNTER1:
2806                 ien &= ~XSCALE2_COUNT1_INT_EN;
2807                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2808                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2809                 break;
2810         case XSCALE_COUNTER2:
2811                 ien &= ~XSCALE2_COUNT2_INT_EN;
2812                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2813                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2814                 break;
2815         case XSCALE_COUNTER3:
2816                 ien &= ~XSCALE2_COUNT3_INT_EN;
2817                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2818                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2819                 break;
2820         default:
2821                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2822                 return;
2823         }
2824
2825         spin_lock_irqsave(&pmu_lock, flags);
2826         xscale2pmu_write_event_select(evtsel);
2827         xscale2pmu_write_int_enable(ien);
2828         spin_unlock_irqrestore(&pmu_lock, flags);
2829 }
2830
2831 static int
2832 xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2833                         struct hw_perf_event *event)
2834 {
2835         int idx = xscale1pmu_get_event_idx(cpuc, event);
2836         if (idx >= 0)
2837                 goto out;
2838
2839         if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2840                 idx = XSCALE_COUNTER3;
2841         else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2842                 idx = XSCALE_COUNTER2;
2843 out:
2844         return idx;
2845 }
2846
2847 static void
2848 xscale2pmu_start(void)
2849 {
2850         unsigned long flags, val;
2851
2852         spin_lock_irqsave(&pmu_lock, flags);
2853         val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2854         val |= XSCALE_PMU_ENABLE;
2855         xscale2pmu_write_pmnc(val);
2856         spin_unlock_irqrestore(&pmu_lock, flags);
2857 }
2858
2859 static void
2860 xscale2pmu_stop(void)
2861 {
2862         unsigned long flags, val;
2863
2864         spin_lock_irqsave(&pmu_lock, flags);
2865         val = xscale2pmu_read_pmnc();
2866         val &= ~XSCALE_PMU_ENABLE;
2867         xscale2pmu_write_pmnc(val);
2868         spin_unlock_irqrestore(&pmu_lock, flags);
2869 }
2870
2871 static inline u32
2872 xscale2pmu_read_counter(int counter)
2873 {
2874         u32 val = 0;
2875
2876         switch (counter) {
2877         case XSCALE_CYCLE_COUNTER:
2878                 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2879                 break;
2880         case XSCALE_COUNTER0:
2881                 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2882                 break;
2883         case XSCALE_COUNTER1:
2884                 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2885                 break;
2886         case XSCALE_COUNTER2:
2887                 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2888                 break;
2889         case XSCALE_COUNTER3:
2890                 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2891                 break;
2892         }
2893
2894         return val;
2895 }
2896
2897 static inline void
2898 xscale2pmu_write_counter(int counter, u32 val)
2899 {
2900         switch (counter) {
2901         case XSCALE_CYCLE_COUNTER:
2902                 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2903                 break;
2904         case XSCALE_COUNTER0:
2905                 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2906                 break;
2907         case XSCALE_COUNTER1:
2908                 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2909                 break;
2910         case XSCALE_COUNTER2:
2911                 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2912                 break;
2913         case XSCALE_COUNTER3:
2914                 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2915                 break;
2916         }
2917 }
2918
2919 static const struct arm_pmu xscale2pmu = {
2920         .id             = ARM_PERF_PMU_ID_XSCALE2,
2921         .handle_irq     = xscale2pmu_handle_irq,
2922         .enable         = xscale2pmu_enable_event,
2923         .disable        = xscale2pmu_disable_event,
2924         .event_map      = xscalepmu_event_map,
2925         .raw_event      = xscalepmu_raw_event,
2926         .read_counter   = xscale2pmu_read_counter,
2927         .write_counter  = xscale2pmu_write_counter,
2928         .get_event_idx  = xscale2pmu_get_event_idx,
2929         .start          = xscale2pmu_start,
2930         .stop           = xscale2pmu_stop,
2931         .num_events     = 5,
2932         .max_period     = (1LLU << 32) - 1,
2933 };
2934
2935 static int __init
2936 init_hw_perf_events(void)
2937 {
2938         unsigned long cpuid = read_cpuid_id();
2939         unsigned long implementor = (cpuid & 0xFF000000) >> 24;
2940         unsigned long part_number = (cpuid & 0xFFF0);
2941
2942         /* ARM Ltd CPUs. */
2943         if (0x41 == implementor) {
2944                 switch (part_number) {
2945                 case 0xB360:    /* ARM1136 */
2946                 case 0xB560:    /* ARM1156 */
2947                 case 0xB760:    /* ARM1176 */
2948                         armpmu = &armv6pmu;
2949                         memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2950                                         sizeof(armv6_perf_cache_map));
2951                         perf_max_events = armv6pmu.num_events;
2952                         break;
2953                 case 0xB020:    /* ARM11mpcore */
2954                         armpmu = &armv6mpcore_pmu;
2955                         memcpy(armpmu_perf_cache_map,
2956                                armv6mpcore_perf_cache_map,
2957                                sizeof(armv6mpcore_perf_cache_map));
2958                         perf_max_events = armv6mpcore_pmu.num_events;
2959                         break;
2960                 case 0xC080:    /* Cortex-A8 */
2961                         armv7pmu.id = ARM_PERF_PMU_ID_CA8;
2962                         memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
2963                                 sizeof(armv7_a8_perf_cache_map));
2964                         armv7pmu.event_map = armv7_a8_pmu_event_map;
2965                         armpmu = &armv7pmu;
2966
2967                         /* Reset PMNC and read the nb of CNTx counters
2968                             supported */
2969                         armv7pmu.num_events = armv7_reset_read_pmnc();
2970                         perf_max_events = armv7pmu.num_events;
2971                         break;
2972                 case 0xC090:    /* Cortex-A9 */
2973                         armv7pmu.id = ARM_PERF_PMU_ID_CA9;
2974                         memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
2975                                 sizeof(armv7_a9_perf_cache_map));
2976                         armv7pmu.event_map = armv7_a9_pmu_event_map;
2977                         armpmu = &armv7pmu;
2978
2979                         /* Reset PMNC and read the nb of CNTx counters
2980                             supported */
2981                         armv7pmu.num_events = armv7_reset_read_pmnc();
2982                         perf_max_events = armv7pmu.num_events;
2983                         break;
2984                 }
2985         /* Intel CPUs [xscale]. */
2986         } else if (0x69 == implementor) {
2987                 part_number = (cpuid >> 13) & 0x7;
2988                 switch (part_number) {
2989                 case 1:
2990                         armpmu = &xscale1pmu;
2991                         memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2992                                         sizeof(xscale_perf_cache_map));
2993                         perf_max_events = xscale1pmu.num_events;
2994                         break;
2995                 case 2:
2996                         armpmu = &xscale2pmu;
2997                         memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2998                                         sizeof(xscale_perf_cache_map));
2999                         perf_max_events = xscale2pmu.num_events;
3000                         break;
3001                 }
3002         }
3003
3004         if (armpmu) {
3005                 pr_info("enabled with %s PMU driver, %d counters available\n",
3006                                 arm_pmu_names[armpmu->id], armpmu->num_events);
3007         } else {
3008                 pr_info("no hardware support available\n");
3009                 perf_max_events = -1;
3010         }
3011
3012         perf_pmu_register(&pmu);
3013
3014         return 0;
3015 }
3016 arch_initcall(init_hw_perf_events);
3017
3018 /*
3019  * Callchain handling code.
3020  */
3021
3022 /*
3023  * The registers we're interested in are at the end of the variable
3024  * length saved register structure. The fp points at the end of this
3025  * structure so the address of this struct is:
3026  * (struct frame_tail *)(xxx->fp)-1
3027  *
3028  * This code has been adapted from the ARM OProfile support.
3029  */
3030 struct frame_tail {
3031         struct frame_tail   *fp;
3032         unsigned long       sp;
3033         unsigned long       lr;
3034 } __attribute__((packed));
3035
3036 /*
3037  * Get the return address for a single stackframe and return a pointer to the
3038  * next frame tail.
3039  */
3040 static struct frame_tail *
3041 user_backtrace(struct frame_tail *tail,
3042                struct perf_callchain_entry *entry)
3043 {
3044         struct frame_tail buftail;
3045
3046         /* Also check accessibility of one struct frame_tail beyond */
3047         if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
3048                 return NULL;
3049         if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3050                 return NULL;
3051
3052         perf_callchain_store(entry, buftail.lr);
3053
3054         /*
3055          * Frame pointers should strictly progress back up the stack
3056          * (towards higher addresses).
3057          */
3058         if (tail >= buftail.fp)
3059                 return NULL;
3060
3061         return buftail.fp - 1;
3062 }
3063
3064 void
3065 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3066 {
3067         struct frame_tail *tail;
3068
3069
3070         tail = (struct frame_tail *)regs->ARM_fp - 1;
3071
3072         while (tail && !((unsigned long)tail & 0x3))
3073                 tail = user_backtrace(tail, entry);
3074 }
3075
3076 /*
3077  * Gets called by walk_stackframe() for every stackframe. This will be called
3078  * whist unwinding the stackframe and is like a subroutine return so we use
3079  * the PC.
3080  */
3081 static int
3082 callchain_trace(struct stackframe *fr,
3083                 void *data)
3084 {
3085         struct perf_callchain_entry *entry = data;
3086         perf_callchain_store(entry, fr->pc);
3087         return 0;
3088 }
3089
3090 void
3091 perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3092 {
3093         struct stackframe fr;
3094
3095         fr.fp = regs->ARM_fp;
3096         fr.sp = regs->ARM_sp;
3097         fr.lr = regs->ARM_lr;
3098         fr.pc = regs->ARM_pc;
3099         walk_stackframe(&fr, callchain_trace, entry);
3100 }