]> git.karo-electronics.de Git - mv-sheeva.git/blob - arch/arm/kernel/perf_event.c
perf: Rework the PMU methods
[mv-sheeva.git] / arch / arm / kernel / perf_event.c
1 #undef DEBUG
2
3 /*
4  * ARM performance counter support.
5  *
6  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7  *
8  * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9  * 2010 (c) MontaVista Software, LLC.
10  *
11  * This code is based on the sparc64 perf event code, which is in turn based
12  * on the x86 code. Callchain code is based on the ARM OProfile backtrace
13  * code.
14  */
15 #define pr_fmt(fmt) "hw perfevents: " fmt
16
17 #include <linux/interrupt.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/perf_event.h>
21 #include <linux/platform_device.h>
22 #include <linux/spinlock.h>
23 #include <linux/uaccess.h>
24
25 #include <asm/cputype.h>
26 #include <asm/irq.h>
27 #include <asm/irq_regs.h>
28 #include <asm/pmu.h>
29 #include <asm/stacktrace.h>
30
31 static struct platform_device *pmu_device;
32
33 /*
34  * Hardware lock to serialize accesses to PMU registers. Needed for the
35  * read/modify/write sequences.
36  */
37 DEFINE_SPINLOCK(pmu_lock);
38
39 /*
40  * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
41  * another platform that supports more, we need to increase this to be the
42  * largest of all platforms.
43  *
44  * ARMv7 supports up to 32 events:
45  *  cycle counter CCNT + 31 events counters CNT0..30.
46  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
47  */
48 #define ARMPMU_MAX_HWEVENTS             33
49
50 /* The events for a given CPU. */
51 struct cpu_hw_events {
52         /*
53          * The events that are active on the CPU for the given index. Index 0
54          * is reserved.
55          */
56         struct perf_event       *events[ARMPMU_MAX_HWEVENTS];
57
58         /*
59          * A 1 bit for an index indicates that the counter is being used for
60          * an event. A 0 means that the counter can be used.
61          */
62         unsigned long           used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
63
64         /*
65          * A 1 bit for an index indicates that the counter is actively being
66          * used.
67          */
68         unsigned long           active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
69 };
70 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
71
72 /* PMU names. */
73 static const char *arm_pmu_names[] = {
74         [ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
75         [ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
76         [ARM_PERF_PMU_ID_V6]      = "v6",
77         [ARM_PERF_PMU_ID_V6MP]    = "v6mpcore",
78         [ARM_PERF_PMU_ID_CA8]     = "ARMv7 Cortex-A8",
79         [ARM_PERF_PMU_ID_CA9]     = "ARMv7 Cortex-A9",
80 };
81
82 struct arm_pmu {
83         enum arm_perf_pmu_ids id;
84         irqreturn_t     (*handle_irq)(int irq_num, void *dev);
85         void            (*enable)(struct hw_perf_event *evt, int idx);
86         void            (*disable)(struct hw_perf_event *evt, int idx);
87         int             (*event_map)(int evt);
88         u64             (*raw_event)(u64);
89         int             (*get_event_idx)(struct cpu_hw_events *cpuc,
90                                          struct hw_perf_event *hwc);
91         u32             (*read_counter)(int idx);
92         void            (*write_counter)(int idx, u32 val);
93         void            (*start)(void);
94         void            (*stop)(void);
95         int             num_events;
96         u64             max_period;
97 };
98
99 /* Set at runtime when we know what CPU type we are. */
100 static const struct arm_pmu *armpmu;
101
102 enum arm_perf_pmu_ids
103 armpmu_get_pmu_id(void)
104 {
105         int id = -ENODEV;
106
107         if (armpmu != NULL)
108                 id = armpmu->id;
109
110         return id;
111 }
112 EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
113
114 int
115 armpmu_get_max_events(void)
116 {
117         int max_events = 0;
118
119         if (armpmu != NULL)
120                 max_events = armpmu->num_events;
121
122         return max_events;
123 }
124 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
125
126 #define HW_OP_UNSUPPORTED               0xFFFF
127
128 #define C(_x) \
129         PERF_COUNT_HW_CACHE_##_x
130
131 #define CACHE_OP_UNSUPPORTED            0xFFFF
132
133 static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
134                                      [PERF_COUNT_HW_CACHE_OP_MAX]
135                                      [PERF_COUNT_HW_CACHE_RESULT_MAX];
136
137 static int
138 armpmu_map_cache_event(u64 config)
139 {
140         unsigned int cache_type, cache_op, cache_result, ret;
141
142         cache_type = (config >>  0) & 0xff;
143         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
144                 return -EINVAL;
145
146         cache_op = (config >>  8) & 0xff;
147         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
148                 return -EINVAL;
149
150         cache_result = (config >> 16) & 0xff;
151         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
152                 return -EINVAL;
153
154         ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
155
156         if (ret == CACHE_OP_UNSUPPORTED)
157                 return -ENOENT;
158
159         return ret;
160 }
161
162 static int
163 armpmu_event_set_period(struct perf_event *event,
164                         struct hw_perf_event *hwc,
165                         int idx)
166 {
167         s64 left = local64_read(&hwc->period_left);
168         s64 period = hwc->sample_period;
169         int ret = 0;
170
171         if (unlikely(left <= -period)) {
172                 left = period;
173                 local64_set(&hwc->period_left, left);
174                 hwc->last_period = period;
175                 ret = 1;
176         }
177
178         if (unlikely(left <= 0)) {
179                 left += period;
180                 local64_set(&hwc->period_left, left);
181                 hwc->last_period = period;
182                 ret = 1;
183         }
184
185         if (left > (s64)armpmu->max_period)
186                 left = armpmu->max_period;
187
188         local64_set(&hwc->prev_count, (u64)-left);
189
190         armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
191
192         perf_event_update_userpage(event);
193
194         return ret;
195 }
196
197 static u64
198 armpmu_event_update(struct perf_event *event,
199                     struct hw_perf_event *hwc,
200                     int idx)
201 {
202         int shift = 64 - 32;
203         s64 prev_raw_count, new_raw_count;
204         u64 delta;
205
206 again:
207         prev_raw_count = local64_read(&hwc->prev_count);
208         new_raw_count = armpmu->read_counter(idx);
209
210         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
211                              new_raw_count) != prev_raw_count)
212                 goto again;
213
214         delta = (new_raw_count << shift) - (prev_raw_count << shift);
215         delta >>= shift;
216
217         local64_add(delta, &event->count);
218         local64_sub(delta, &hwc->period_left);
219
220         return new_raw_count;
221 }
222
223 static void
224 armpmu_read(struct perf_event *event)
225 {
226         struct hw_perf_event *hwc = &event->hw;
227
228         /* Don't read disabled counters! */
229         if (hwc->idx < 0)
230                 return;
231
232         armpmu_event_update(event, hwc, hwc->idx);
233 }
234
235 static void
236 armpmu_stop(struct perf_event *event, int flags)
237 {
238         struct hw_perf_event *hwc = &event->hw;
239
240         if (!armpmu)
241                 return;
242
243         /*
244          * ARM pmu always has to update the counter, so ignore
245          * PERF_EF_UPDATE, see comments in armpmu_start().
246          */
247         if (!(hwc->state & PERF_HES_STOPPED)) {
248                 armpmu->disable(hwc, hwc->idx);
249                 barrier(); /* why? */
250                 armpmu_event_update(event, hwc, hwc->idx);
251                 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
252         }
253 }
254
255 static void
256 armpmu_start(struct perf_event *event, int flags)
257 {
258         struct hw_perf_event *hwc = &event->hw;
259
260         if (!armpmu)
261                 return;
262
263         /*
264          * ARM pmu always has to reprogram the period, so ignore
265          * PERF_EF_RELOAD, see the comment below.
266          */
267         if (flags & PERF_EF_RELOAD)
268                 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
269
270         hwc->state = 0;
271         /*
272          * Set the period again. Some counters can't be stopped, so when we
273          * were stopped we simply disabled the IRQ source and the counter
274          * may have been left counting. If we don't do this step then we may
275          * get an interrupt too soon or *way* too late if the overflow has
276          * happened since disabling.
277          */
278         armpmu_event_set_period(event, hwc, hwc->idx);
279         armpmu->enable(hwc, hwc->idx);
280 }
281
282 static void
283 armpmu_del(struct perf_event *event, int flags)
284 {
285         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
286         struct hw_perf_event *hwc = &event->hw;
287         int idx = hwc->idx;
288
289         WARN_ON(idx < 0);
290
291         clear_bit(idx, cpuc->active_mask);
292         armpmu_stop(event, PERF_EF_UPDATE);
293         cpuc->events[idx] = NULL;
294         clear_bit(idx, cpuc->used_mask);
295
296         perf_event_update_userpage(event);
297 }
298
299 static int
300 armpmu_add(struct perf_event *event, int flags)
301 {
302         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
303         struct hw_perf_event *hwc = &event->hw;
304         int idx;
305         int err = 0;
306
307         perf_pmu_disable(event->pmu);
308
309         /* If we don't have a space for the counter then finish early. */
310         idx = armpmu->get_event_idx(cpuc, hwc);
311         if (idx < 0) {
312                 err = idx;
313                 goto out;
314         }
315
316         /*
317          * If there is an event in the counter we are going to use then make
318          * sure it is disabled.
319          */
320         event->hw.idx = idx;
321         armpmu->disable(hwc, idx);
322         cpuc->events[idx] = event;
323         set_bit(idx, cpuc->active_mask);
324
325         hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
326         if (flags & PERF_EF_START)
327                 armpmu_start(event, PERF_EF_RELOAD);
328
329         /* Propagate our changes to the userspace mapping. */
330         perf_event_update_userpage(event);
331
332 out:
333         perf_pmu_enable(event->pmu);
334         return err;
335 }
336
337 static struct pmu pmu;
338
339 static int
340 validate_event(struct cpu_hw_events *cpuc,
341                struct perf_event *event)
342 {
343         struct hw_perf_event fake_event = event->hw;
344
345         if (event->pmu && event->pmu != &pmu)
346                 return 0;
347
348         return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
349 }
350
351 static int
352 validate_group(struct perf_event *event)
353 {
354         struct perf_event *sibling, *leader = event->group_leader;
355         struct cpu_hw_events fake_pmu;
356
357         memset(&fake_pmu, 0, sizeof(fake_pmu));
358
359         if (!validate_event(&fake_pmu, leader))
360                 return -ENOSPC;
361
362         list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
363                 if (!validate_event(&fake_pmu, sibling))
364                         return -ENOSPC;
365         }
366
367         if (!validate_event(&fake_pmu, event))
368                 return -ENOSPC;
369
370         return 0;
371 }
372
373 static int
374 armpmu_reserve_hardware(void)
375 {
376         int i, err = -ENODEV, irq;
377
378         pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
379         if (IS_ERR(pmu_device)) {
380                 pr_warning("unable to reserve pmu\n");
381                 return PTR_ERR(pmu_device);
382         }
383
384         init_pmu(ARM_PMU_DEVICE_CPU);
385
386         if (pmu_device->num_resources < 1) {
387                 pr_err("no irqs for PMUs defined\n");
388                 return -ENODEV;
389         }
390
391         for (i = 0; i < pmu_device->num_resources; ++i) {
392                 irq = platform_get_irq(pmu_device, i);
393                 if (irq < 0)
394                         continue;
395
396                 err = request_irq(irq, armpmu->handle_irq,
397                                   IRQF_DISABLED | IRQF_NOBALANCING,
398                                   "armpmu", NULL);
399                 if (err) {
400                         pr_warning("unable to request IRQ%d for ARM perf "
401                                 "counters\n", irq);
402                         break;
403                 }
404         }
405
406         if (err) {
407                 for (i = i - 1; i >= 0; --i) {
408                         irq = platform_get_irq(pmu_device, i);
409                         if (irq >= 0)
410                                 free_irq(irq, NULL);
411                 }
412                 release_pmu(pmu_device);
413                 pmu_device = NULL;
414         }
415
416         return err;
417 }
418
419 static void
420 armpmu_release_hardware(void)
421 {
422         int i, irq;
423
424         for (i = pmu_device->num_resources - 1; i >= 0; --i) {
425                 irq = platform_get_irq(pmu_device, i);
426                 if (irq >= 0)
427                         free_irq(irq, NULL);
428         }
429         armpmu->stop();
430
431         release_pmu(pmu_device);
432         pmu_device = NULL;
433 }
434
435 static atomic_t active_events = ATOMIC_INIT(0);
436 static DEFINE_MUTEX(pmu_reserve_mutex);
437
438 static void
439 hw_perf_event_destroy(struct perf_event *event)
440 {
441         if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
442                 armpmu_release_hardware();
443                 mutex_unlock(&pmu_reserve_mutex);
444         }
445 }
446
447 static int
448 __hw_perf_event_init(struct perf_event *event)
449 {
450         struct hw_perf_event *hwc = &event->hw;
451         int mapping, err;
452
453         /* Decode the generic type into an ARM event identifier. */
454         if (PERF_TYPE_HARDWARE == event->attr.type) {
455                 mapping = armpmu->event_map(event->attr.config);
456         } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
457                 mapping = armpmu_map_cache_event(event->attr.config);
458         } else if (PERF_TYPE_RAW == event->attr.type) {
459                 mapping = armpmu->raw_event(event->attr.config);
460         } else {
461                 pr_debug("event type %x not supported\n", event->attr.type);
462                 return -EOPNOTSUPP;
463         }
464
465         if (mapping < 0) {
466                 pr_debug("event %x:%llx not supported\n", event->attr.type,
467                          event->attr.config);
468                 return mapping;
469         }
470
471         /*
472          * Check whether we need to exclude the counter from certain modes.
473          * The ARM performance counters are on all of the time so if someone
474          * has asked us for some excludes then we have to fail.
475          */
476         if (event->attr.exclude_kernel || event->attr.exclude_user ||
477             event->attr.exclude_hv || event->attr.exclude_idle) {
478                 pr_debug("ARM performance counters do not support "
479                          "mode exclusion\n");
480                 return -EPERM;
481         }
482
483         /*
484          * We don't assign an index until we actually place the event onto
485          * hardware. Use -1 to signify that we haven't decided where to put it
486          * yet. For SMP systems, each core has it's own PMU so we can't do any
487          * clever allocation or constraints checking at this point.
488          */
489         hwc->idx = -1;
490
491         /*
492          * Store the event encoding into the config_base field. config and
493          * event_base are unused as the only 2 things we need to know are
494          * the event mapping and the counter to use. The counter to use is
495          * also the indx and the config_base is the event type.
496          */
497         hwc->config_base            = (unsigned long)mapping;
498         hwc->config                 = 0;
499         hwc->event_base             = 0;
500
501         if (!hwc->sample_period) {
502                 hwc->sample_period  = armpmu->max_period;
503                 hwc->last_period    = hwc->sample_period;
504                 local64_set(&hwc->period_left, hwc->sample_period);
505         }
506
507         err = 0;
508         if (event->group_leader != event) {
509                 err = validate_group(event);
510                 if (err)
511                         return -EINVAL;
512         }
513
514         return err;
515 }
516
517 static int armpmu_event_init(struct perf_event *event)
518 {
519         int err = 0;
520
521         switch (event->attr.type) {
522         case PERF_TYPE_RAW:
523         case PERF_TYPE_HARDWARE:
524         case PERF_TYPE_HW_CACHE:
525                 break;
526
527         default:
528                 return -ENOENT;
529         }
530
531         if (!armpmu)
532                 return -ENODEV;
533
534         event->destroy = hw_perf_event_destroy;
535
536         if (!atomic_inc_not_zero(&active_events)) {
537                 if (atomic_read(&active_events) > perf_max_events) {
538                         atomic_dec(&active_events);
539                         return -ENOSPC;
540                 }
541
542                 mutex_lock(&pmu_reserve_mutex);
543                 if (atomic_read(&active_events) == 0) {
544                         err = armpmu_reserve_hardware();
545                 }
546
547                 if (!err)
548                         atomic_inc(&active_events);
549                 mutex_unlock(&pmu_reserve_mutex);
550         }
551
552         if (err)
553                 return err;
554
555         err = __hw_perf_event_init(event);
556         if (err)
557                 hw_perf_event_destroy(event);
558
559         return err;
560 }
561
562 static void armpmu_enable(struct pmu *pmu)
563 {
564         /* Enable all of the perf events on hardware. */
565         int idx;
566         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
567
568         if (!armpmu)
569                 return;
570
571         for (idx = 0; idx <= armpmu->num_events; ++idx) {
572                 struct perf_event *event = cpuc->events[idx];
573
574                 if (!event)
575                         continue;
576
577                 armpmu->enable(&event->hw, idx);
578         }
579
580         armpmu->start();
581 }
582
583 static void armpmu_disable(struct pmu *pmu)
584 {
585         if (armpmu)
586                 armpmu->stop();
587 }
588
589 static struct pmu pmu = {
590         .pmu_enable     = armpmu_enable,
591         .pmu_disable    = armpmu_disable,
592         .event_init     = armpmu_event_init,
593         .add            = armpmu_add,
594         .del            = armpmu_del,
595         .start          = armpmu_start,
596         .stop           = armpmu_stop,
597         .read           = armpmu_read,
598 };
599
600 /*
601  * ARMv6 Performance counter handling code.
602  *
603  * ARMv6 has 2 configurable performance counters and a single cycle counter.
604  * They all share a single reset bit but can be written to zero so we can use
605  * that for a reset.
606  *
607  * The counters can't be individually enabled or disabled so when we remove
608  * one event and replace it with another we could get spurious counts from the
609  * wrong event. However, we can take advantage of the fact that the
610  * performance counters can export events to the event bus, and the event bus
611  * itself can be monitored. This requires that we *don't* export the events to
612  * the event bus. The procedure for disabling a configurable counter is:
613  *      - change the counter to count the ETMEXTOUT[0] signal (0x20). This
614  *        effectively stops the counter from counting.
615  *      - disable the counter's interrupt generation (each counter has it's
616  *        own interrupt enable bit).
617  * Once stopped, the counter value can be written as 0 to reset.
618  *
619  * To enable a counter:
620  *      - enable the counter's interrupt generation.
621  *      - set the new event type.
622  *
623  * Note: the dedicated cycle counter only counts cycles and can't be
624  * enabled/disabled independently of the others. When we want to disable the
625  * cycle counter, we have to just disable the interrupt reporting and start
626  * ignoring that counter. When re-enabling, we have to reset the value and
627  * enable the interrupt.
628  */
629
630 enum armv6_perf_types {
631         ARMV6_PERFCTR_ICACHE_MISS           = 0x0,
632         ARMV6_PERFCTR_IBUF_STALL            = 0x1,
633         ARMV6_PERFCTR_DDEP_STALL            = 0x2,
634         ARMV6_PERFCTR_ITLB_MISS             = 0x3,
635         ARMV6_PERFCTR_DTLB_MISS             = 0x4,
636         ARMV6_PERFCTR_BR_EXEC               = 0x5,
637         ARMV6_PERFCTR_BR_MISPREDICT         = 0x6,
638         ARMV6_PERFCTR_INSTR_EXEC            = 0x7,
639         ARMV6_PERFCTR_DCACHE_HIT            = 0x9,
640         ARMV6_PERFCTR_DCACHE_ACCESS         = 0xA,
641         ARMV6_PERFCTR_DCACHE_MISS           = 0xB,
642         ARMV6_PERFCTR_DCACHE_WBACK          = 0xC,
643         ARMV6_PERFCTR_SW_PC_CHANGE          = 0xD,
644         ARMV6_PERFCTR_MAIN_TLB_MISS         = 0xF,
645         ARMV6_PERFCTR_EXPL_D_ACCESS         = 0x10,
646         ARMV6_PERFCTR_LSU_FULL_STALL        = 0x11,
647         ARMV6_PERFCTR_WBUF_DRAINED          = 0x12,
648         ARMV6_PERFCTR_CPU_CYCLES            = 0xFF,
649         ARMV6_PERFCTR_NOP                   = 0x20,
650 };
651
652 enum armv6_counters {
653         ARMV6_CYCLE_COUNTER = 1,
654         ARMV6_COUNTER0,
655         ARMV6_COUNTER1,
656 };
657
658 /*
659  * The hardware events that we support. We do support cache operations but
660  * we have harvard caches and no way to combine instruction and data
661  * accesses/misses in hardware.
662  */
663 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
664         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6_PERFCTR_CPU_CYCLES,
665         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6_PERFCTR_INSTR_EXEC,
666         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
667         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
668         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
669         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6_PERFCTR_BR_MISPREDICT,
670         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
671 };
672
673 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
674                                           [PERF_COUNT_HW_CACHE_OP_MAX]
675                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
676         [C(L1D)] = {
677                 /*
678                  * The performance counters don't differentiate between read
679                  * and write accesses/misses so this isn't strictly correct,
680                  * but it's the best we can do. Writes and reads get
681                  * combined.
682                  */
683                 [C(OP_READ)] = {
684                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
685                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
686                 },
687                 [C(OP_WRITE)] = {
688                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
689                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
690                 },
691                 [C(OP_PREFETCH)] = {
692                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
693                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
694                 },
695         },
696         [C(L1I)] = {
697                 [C(OP_READ)] = {
698                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
699                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
700                 },
701                 [C(OP_WRITE)] = {
702                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
703                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
704                 },
705                 [C(OP_PREFETCH)] = {
706                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
707                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
708                 },
709         },
710         [C(LL)] = {
711                 [C(OP_READ)] = {
712                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
713                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
714                 },
715                 [C(OP_WRITE)] = {
716                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
717                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
718                 },
719                 [C(OP_PREFETCH)] = {
720                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
721                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
722                 },
723         },
724         [C(DTLB)] = {
725                 /*
726                  * The ARM performance counters can count micro DTLB misses,
727                  * micro ITLB misses and main TLB misses. There isn't an event
728                  * for TLB misses, so use the micro misses here and if users
729                  * want the main TLB misses they can use a raw counter.
730                  */
731                 [C(OP_READ)] = {
732                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
733                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
734                 },
735                 [C(OP_WRITE)] = {
736                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
737                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
738                 },
739                 [C(OP_PREFETCH)] = {
740                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
741                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
742                 },
743         },
744         [C(ITLB)] = {
745                 [C(OP_READ)] = {
746                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
747                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
748                 },
749                 [C(OP_WRITE)] = {
750                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
751                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
752                 },
753                 [C(OP_PREFETCH)] = {
754                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
755                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
756                 },
757         },
758         [C(BPU)] = {
759                 [C(OP_READ)] = {
760                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
761                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
762                 },
763                 [C(OP_WRITE)] = {
764                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
765                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
766                 },
767                 [C(OP_PREFETCH)] = {
768                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
769                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
770                 },
771         },
772 };
773
774 enum armv6mpcore_perf_types {
775         ARMV6MPCORE_PERFCTR_ICACHE_MISS     = 0x0,
776         ARMV6MPCORE_PERFCTR_IBUF_STALL      = 0x1,
777         ARMV6MPCORE_PERFCTR_DDEP_STALL      = 0x2,
778         ARMV6MPCORE_PERFCTR_ITLB_MISS       = 0x3,
779         ARMV6MPCORE_PERFCTR_DTLB_MISS       = 0x4,
780         ARMV6MPCORE_PERFCTR_BR_EXEC         = 0x5,
781         ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
782         ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
783         ARMV6MPCORE_PERFCTR_INSTR_EXEC      = 0x8,
784         ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
785         ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
786         ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
787         ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
788         ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
789         ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
790         ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
791         ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
792         ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
793         ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
794         ARMV6MPCORE_PERFCTR_CPU_CYCLES      = 0xFF,
795 };
796
797 /*
798  * The hardware events that we support. We do support cache operations but
799  * we have harvard caches and no way to combine instruction and data
800  * accesses/misses in hardware.
801  */
802 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
803         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
804         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
805         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
806         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
807         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
808         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
809         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
810 };
811
812 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
813                                         [PERF_COUNT_HW_CACHE_OP_MAX]
814                                         [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
815         [C(L1D)] = {
816                 [C(OP_READ)] = {
817                         [C(RESULT_ACCESS)]  =
818                                 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
819                         [C(RESULT_MISS)]    =
820                                 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
821                 },
822                 [C(OP_WRITE)] = {
823                         [C(RESULT_ACCESS)]  =
824                                 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
825                         [C(RESULT_MISS)]    =
826                                 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
827                 },
828                 [C(OP_PREFETCH)] = {
829                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
830                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
831                 },
832         },
833         [C(L1I)] = {
834                 [C(OP_READ)] = {
835                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
836                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
837                 },
838                 [C(OP_WRITE)] = {
839                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
840                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
841                 },
842                 [C(OP_PREFETCH)] = {
843                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
844                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
845                 },
846         },
847         [C(LL)] = {
848                 [C(OP_READ)] = {
849                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
850                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
851                 },
852                 [C(OP_WRITE)] = {
853                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
854                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
855                 },
856                 [C(OP_PREFETCH)] = {
857                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
858                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
859                 },
860         },
861         [C(DTLB)] = {
862                 /*
863                  * The ARM performance counters can count micro DTLB misses,
864                  * micro ITLB misses and main TLB misses. There isn't an event
865                  * for TLB misses, so use the micro misses here and if users
866                  * want the main TLB misses they can use a raw counter.
867                  */
868                 [C(OP_READ)] = {
869                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
870                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
871                 },
872                 [C(OP_WRITE)] = {
873                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
874                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
875                 },
876                 [C(OP_PREFETCH)] = {
877                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
878                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
879                 },
880         },
881         [C(ITLB)] = {
882                 [C(OP_READ)] = {
883                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
884                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
885                 },
886                 [C(OP_WRITE)] = {
887                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
888                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
889                 },
890                 [C(OP_PREFETCH)] = {
891                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
892                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
893                 },
894         },
895         [C(BPU)] = {
896                 [C(OP_READ)] = {
897                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
898                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
899                 },
900                 [C(OP_WRITE)] = {
901                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
902                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
903                 },
904                 [C(OP_PREFETCH)] = {
905                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
906                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
907                 },
908         },
909 };
910
911 static inline unsigned long
912 armv6_pmcr_read(void)
913 {
914         u32 val;
915         asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
916         return val;
917 }
918
919 static inline void
920 armv6_pmcr_write(unsigned long val)
921 {
922         asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
923 }
924
925 #define ARMV6_PMCR_ENABLE               (1 << 0)
926 #define ARMV6_PMCR_CTR01_RESET          (1 << 1)
927 #define ARMV6_PMCR_CCOUNT_RESET         (1 << 2)
928 #define ARMV6_PMCR_CCOUNT_DIV           (1 << 3)
929 #define ARMV6_PMCR_COUNT0_IEN           (1 << 4)
930 #define ARMV6_PMCR_COUNT1_IEN           (1 << 5)
931 #define ARMV6_PMCR_CCOUNT_IEN           (1 << 6)
932 #define ARMV6_PMCR_COUNT0_OVERFLOW      (1 << 8)
933 #define ARMV6_PMCR_COUNT1_OVERFLOW      (1 << 9)
934 #define ARMV6_PMCR_CCOUNT_OVERFLOW      (1 << 10)
935 #define ARMV6_PMCR_EVT_COUNT0_SHIFT     20
936 #define ARMV6_PMCR_EVT_COUNT0_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
937 #define ARMV6_PMCR_EVT_COUNT1_SHIFT     12
938 #define ARMV6_PMCR_EVT_COUNT1_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
939
940 #define ARMV6_PMCR_OVERFLOWED_MASK \
941         (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
942          ARMV6_PMCR_CCOUNT_OVERFLOW)
943
944 static inline int
945 armv6_pmcr_has_overflowed(unsigned long pmcr)
946 {
947         return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
948 }
949
950 static inline int
951 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
952                                   enum armv6_counters counter)
953 {
954         int ret = 0;
955
956         if (ARMV6_CYCLE_COUNTER == counter)
957                 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
958         else if (ARMV6_COUNTER0 == counter)
959                 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
960         else if (ARMV6_COUNTER1 == counter)
961                 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
962         else
963                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
964
965         return ret;
966 }
967
968 static inline u32
969 armv6pmu_read_counter(int counter)
970 {
971         unsigned long value = 0;
972
973         if (ARMV6_CYCLE_COUNTER == counter)
974                 asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
975         else if (ARMV6_COUNTER0 == counter)
976                 asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
977         else if (ARMV6_COUNTER1 == counter)
978                 asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
979         else
980                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
981
982         return value;
983 }
984
985 static inline void
986 armv6pmu_write_counter(int counter,
987                        u32 value)
988 {
989         if (ARMV6_CYCLE_COUNTER == counter)
990                 asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
991         else if (ARMV6_COUNTER0 == counter)
992                 asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
993         else if (ARMV6_COUNTER1 == counter)
994                 asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
995         else
996                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
997 }
998
999 void
1000 armv6pmu_enable_event(struct hw_perf_event *hwc,
1001                       int idx)
1002 {
1003         unsigned long val, mask, evt, flags;
1004
1005         if (ARMV6_CYCLE_COUNTER == idx) {
1006                 mask    = 0;
1007                 evt     = ARMV6_PMCR_CCOUNT_IEN;
1008         } else if (ARMV6_COUNTER0 == idx) {
1009                 mask    = ARMV6_PMCR_EVT_COUNT0_MASK;
1010                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
1011                           ARMV6_PMCR_COUNT0_IEN;
1012         } else if (ARMV6_COUNTER1 == idx) {
1013                 mask    = ARMV6_PMCR_EVT_COUNT1_MASK;
1014                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
1015                           ARMV6_PMCR_COUNT1_IEN;
1016         } else {
1017                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1018                 return;
1019         }
1020
1021         /*
1022          * Mask out the current event and set the counter to count the event
1023          * that we're interested in.
1024          */
1025         spin_lock_irqsave(&pmu_lock, flags);
1026         val = armv6_pmcr_read();
1027         val &= ~mask;
1028         val |= evt;
1029         armv6_pmcr_write(val);
1030         spin_unlock_irqrestore(&pmu_lock, flags);
1031 }
1032
1033 static irqreturn_t
1034 armv6pmu_handle_irq(int irq_num,
1035                     void *dev)
1036 {
1037         unsigned long pmcr = armv6_pmcr_read();
1038         struct perf_sample_data data;
1039         struct cpu_hw_events *cpuc;
1040         struct pt_regs *regs;
1041         int idx;
1042
1043         if (!armv6_pmcr_has_overflowed(pmcr))
1044                 return IRQ_NONE;
1045
1046         regs = get_irq_regs();
1047
1048         /*
1049          * The interrupts are cleared by writing the overflow flags back to
1050          * the control register. All of the other bits don't have any effect
1051          * if they are rewritten, so write the whole value back.
1052          */
1053         armv6_pmcr_write(pmcr);
1054
1055         perf_sample_data_init(&data, 0);
1056
1057         cpuc = &__get_cpu_var(cpu_hw_events);
1058         for (idx = 0; idx <= armpmu->num_events; ++idx) {
1059                 struct perf_event *event = cpuc->events[idx];
1060                 struct hw_perf_event *hwc;
1061
1062                 if (!test_bit(idx, cpuc->active_mask))
1063                         continue;
1064
1065                 /*
1066                  * We have a single interrupt for all counters. Check that
1067                  * each counter has overflowed before we process it.
1068                  */
1069                 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1070                         continue;
1071
1072                 hwc = &event->hw;
1073                 armpmu_event_update(event, hwc, idx);
1074                 data.period = event->hw.last_period;
1075                 if (!armpmu_event_set_period(event, hwc, idx))
1076                         continue;
1077
1078                 if (perf_event_overflow(event, 0, &data, regs))
1079                         armpmu->disable(hwc, idx);
1080         }
1081
1082         /*
1083          * Handle the pending perf events.
1084          *
1085          * Note: this call *must* be run with interrupts enabled. For
1086          * platforms that can have the PMU interrupts raised as a PMI, this
1087          * will not work.
1088          */
1089         perf_event_do_pending();
1090
1091         return IRQ_HANDLED;
1092 }
1093
1094 static void
1095 armv6pmu_start(void)
1096 {
1097         unsigned long flags, val;
1098
1099         spin_lock_irqsave(&pmu_lock, flags);
1100         val = armv6_pmcr_read();
1101         val |= ARMV6_PMCR_ENABLE;
1102         armv6_pmcr_write(val);
1103         spin_unlock_irqrestore(&pmu_lock, flags);
1104 }
1105
1106 void
1107 armv6pmu_stop(void)
1108 {
1109         unsigned long flags, val;
1110
1111         spin_lock_irqsave(&pmu_lock, flags);
1112         val = armv6_pmcr_read();
1113         val &= ~ARMV6_PMCR_ENABLE;
1114         armv6_pmcr_write(val);
1115         spin_unlock_irqrestore(&pmu_lock, flags);
1116 }
1117
1118 static inline int
1119 armv6pmu_event_map(int config)
1120 {
1121         int mapping = armv6_perf_map[config];
1122         if (HW_OP_UNSUPPORTED == mapping)
1123                 mapping = -EOPNOTSUPP;
1124         return mapping;
1125 }
1126
1127 static inline int
1128 armv6mpcore_pmu_event_map(int config)
1129 {
1130         int mapping = armv6mpcore_perf_map[config];
1131         if (HW_OP_UNSUPPORTED == mapping)
1132                 mapping = -EOPNOTSUPP;
1133         return mapping;
1134 }
1135
1136 static u64
1137 armv6pmu_raw_event(u64 config)
1138 {
1139         return config & 0xff;
1140 }
1141
1142 static int
1143 armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1144                        struct hw_perf_event *event)
1145 {
1146         /* Always place a cycle counter into the cycle counter. */
1147         if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1148                 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1149                         return -EAGAIN;
1150
1151                 return ARMV6_CYCLE_COUNTER;
1152         } else {
1153                 /*
1154                  * For anything other than a cycle counter, try and use
1155                  * counter0 and counter1.
1156                  */
1157                 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1158                         return ARMV6_COUNTER1;
1159                 }
1160
1161                 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1162                         return ARMV6_COUNTER0;
1163                 }
1164
1165                 /* The counters are all in use. */
1166                 return -EAGAIN;
1167         }
1168 }
1169
1170 static void
1171 armv6pmu_disable_event(struct hw_perf_event *hwc,
1172                        int idx)
1173 {
1174         unsigned long val, mask, evt, flags;
1175
1176         if (ARMV6_CYCLE_COUNTER == idx) {
1177                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1178                 evt     = 0;
1179         } else if (ARMV6_COUNTER0 == idx) {
1180                 mask    = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1181                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1182         } else if (ARMV6_COUNTER1 == idx) {
1183                 mask    = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1184                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1185         } else {
1186                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1187                 return;
1188         }
1189
1190         /*
1191          * Mask out the current event and set the counter to count the number
1192          * of ETM bus signal assertion cycles. The external reporting should
1193          * be disabled and so this should never increment.
1194          */
1195         spin_lock_irqsave(&pmu_lock, flags);
1196         val = armv6_pmcr_read();
1197         val &= ~mask;
1198         val |= evt;
1199         armv6_pmcr_write(val);
1200         spin_unlock_irqrestore(&pmu_lock, flags);
1201 }
1202
1203 static void
1204 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1205                               int idx)
1206 {
1207         unsigned long val, mask, flags, evt = 0;
1208
1209         if (ARMV6_CYCLE_COUNTER == idx) {
1210                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1211         } else if (ARMV6_COUNTER0 == idx) {
1212                 mask    = ARMV6_PMCR_COUNT0_IEN;
1213         } else if (ARMV6_COUNTER1 == idx) {
1214                 mask    = ARMV6_PMCR_COUNT1_IEN;
1215         } else {
1216                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1217                 return;
1218         }
1219
1220         /*
1221          * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1222          * simply disable the interrupt reporting.
1223          */
1224         spin_lock_irqsave(&pmu_lock, flags);
1225         val = armv6_pmcr_read();
1226         val &= ~mask;
1227         val |= evt;
1228         armv6_pmcr_write(val);
1229         spin_unlock_irqrestore(&pmu_lock, flags);
1230 }
1231
1232 static const struct arm_pmu armv6pmu = {
1233         .id                     = ARM_PERF_PMU_ID_V6,
1234         .handle_irq             = armv6pmu_handle_irq,
1235         .enable                 = armv6pmu_enable_event,
1236         .disable                = armv6pmu_disable_event,
1237         .event_map              = armv6pmu_event_map,
1238         .raw_event              = armv6pmu_raw_event,
1239         .read_counter           = armv6pmu_read_counter,
1240         .write_counter          = armv6pmu_write_counter,
1241         .get_event_idx          = armv6pmu_get_event_idx,
1242         .start                  = armv6pmu_start,
1243         .stop                   = armv6pmu_stop,
1244         .num_events             = 3,
1245         .max_period             = (1LLU << 32) - 1,
1246 };
1247
1248 /*
1249  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1250  * that some of the events have different enumerations and that there is no
1251  * *hack* to stop the programmable counters. To stop the counters we simply
1252  * disable the interrupt reporting and update the event. When unthrottling we
1253  * reset the period and enable the interrupt reporting.
1254  */
1255 static const struct arm_pmu armv6mpcore_pmu = {
1256         .id                     = ARM_PERF_PMU_ID_V6MP,
1257         .handle_irq             = armv6pmu_handle_irq,
1258         .enable                 = armv6pmu_enable_event,
1259         .disable                = armv6mpcore_pmu_disable_event,
1260         .event_map              = armv6mpcore_pmu_event_map,
1261         .raw_event              = armv6pmu_raw_event,
1262         .read_counter           = armv6pmu_read_counter,
1263         .write_counter          = armv6pmu_write_counter,
1264         .get_event_idx          = armv6pmu_get_event_idx,
1265         .start                  = armv6pmu_start,
1266         .stop                   = armv6pmu_stop,
1267         .num_events             = 3,
1268         .max_period             = (1LLU << 32) - 1,
1269 };
1270
1271 /*
1272  * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1273  *
1274  * Copied from ARMv6 code, with the low level code inspired
1275  *  by the ARMv7 Oprofile code.
1276  *
1277  * Cortex-A8 has up to 4 configurable performance counters and
1278  *  a single cycle counter.
1279  * Cortex-A9 has up to 31 configurable performance counters and
1280  *  a single cycle counter.
1281  *
1282  * All counters can be enabled/disabled and IRQ masked separately. The cycle
1283  *  counter and all 4 performance counters together can be reset separately.
1284  */
1285
1286 /* Common ARMv7 event types */
1287 enum armv7_perf_types {
1288         ARMV7_PERFCTR_PMNC_SW_INCR              = 0x00,
1289         ARMV7_PERFCTR_IFETCH_MISS               = 0x01,
1290         ARMV7_PERFCTR_ITLB_MISS                 = 0x02,
1291         ARMV7_PERFCTR_DCACHE_REFILL             = 0x03,
1292         ARMV7_PERFCTR_DCACHE_ACCESS             = 0x04,
1293         ARMV7_PERFCTR_DTLB_REFILL               = 0x05,
1294         ARMV7_PERFCTR_DREAD                     = 0x06,
1295         ARMV7_PERFCTR_DWRITE                    = 0x07,
1296
1297         ARMV7_PERFCTR_EXC_TAKEN                 = 0x09,
1298         ARMV7_PERFCTR_EXC_EXECUTED              = 0x0A,
1299         ARMV7_PERFCTR_CID_WRITE                 = 0x0B,
1300         /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1301          * It counts:
1302          *  - all branch instructions,
1303          *  - instructions that explicitly write the PC,
1304          *  - exception generating instructions.
1305          */
1306         ARMV7_PERFCTR_PC_WRITE                  = 0x0C,
1307         ARMV7_PERFCTR_PC_IMM_BRANCH             = 0x0D,
1308         ARMV7_PERFCTR_UNALIGNED_ACCESS          = 0x0F,
1309         ARMV7_PERFCTR_PC_BRANCH_MIS_PRED        = 0x10,
1310         ARMV7_PERFCTR_CLOCK_CYCLES              = 0x11,
1311
1312         ARMV7_PERFCTR_PC_BRANCH_MIS_USED        = 0x12,
1313
1314         ARMV7_PERFCTR_CPU_CYCLES                = 0xFF
1315 };
1316
1317 /* ARMv7 Cortex-A8 specific event types */
1318 enum armv7_a8_perf_types {
1319         ARMV7_PERFCTR_INSTR_EXECUTED            = 0x08,
1320
1321         ARMV7_PERFCTR_PC_PROC_RETURN            = 0x0E,
1322
1323         ARMV7_PERFCTR_WRITE_BUFFER_FULL         = 0x40,
1324         ARMV7_PERFCTR_L2_STORE_MERGED           = 0x41,
1325         ARMV7_PERFCTR_L2_STORE_BUFF             = 0x42,
1326         ARMV7_PERFCTR_L2_ACCESS                 = 0x43,
1327         ARMV7_PERFCTR_L2_CACH_MISS              = 0x44,
1328         ARMV7_PERFCTR_AXI_READ_CYCLES           = 0x45,
1329         ARMV7_PERFCTR_AXI_WRITE_CYCLES          = 0x46,
1330         ARMV7_PERFCTR_MEMORY_REPLAY             = 0x47,
1331         ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY   = 0x48,
1332         ARMV7_PERFCTR_L1_DATA_MISS              = 0x49,
1333         ARMV7_PERFCTR_L1_INST_MISS              = 0x4A,
1334         ARMV7_PERFCTR_L1_DATA_COLORING          = 0x4B,
1335         ARMV7_PERFCTR_L1_NEON_DATA              = 0x4C,
1336         ARMV7_PERFCTR_L1_NEON_CACH_DATA         = 0x4D,
1337         ARMV7_PERFCTR_L2_NEON                   = 0x4E,
1338         ARMV7_PERFCTR_L2_NEON_HIT               = 0x4F,
1339         ARMV7_PERFCTR_L1_INST                   = 0x50,
1340         ARMV7_PERFCTR_PC_RETURN_MIS_PRED        = 0x51,
1341         ARMV7_PERFCTR_PC_BRANCH_FAILED          = 0x52,
1342         ARMV7_PERFCTR_PC_BRANCH_TAKEN           = 0x53,
1343         ARMV7_PERFCTR_PC_BRANCH_EXECUTED        = 0x54,
1344         ARMV7_PERFCTR_OP_EXECUTED               = 0x55,
1345         ARMV7_PERFCTR_CYCLES_INST_STALL         = 0x56,
1346         ARMV7_PERFCTR_CYCLES_INST               = 0x57,
1347         ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL    = 0x58,
1348         ARMV7_PERFCTR_CYCLES_NEON_INST_STALL    = 0x59,
1349         ARMV7_PERFCTR_NEON_CYCLES               = 0x5A,
1350
1351         ARMV7_PERFCTR_PMU0_EVENTS               = 0x70,
1352         ARMV7_PERFCTR_PMU1_EVENTS               = 0x71,
1353         ARMV7_PERFCTR_PMU_EVENTS                = 0x72,
1354 };
1355
1356 /* ARMv7 Cortex-A9 specific event types */
1357 enum armv7_a9_perf_types {
1358         ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC     = 0x40,
1359         ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC     = 0x41,
1360         ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC       = 0x42,
1361
1362         ARMV7_PERFCTR_COHERENT_LINE_MISS        = 0x50,
1363         ARMV7_PERFCTR_COHERENT_LINE_HIT         = 0x51,
1364
1365         ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES   = 0x60,
1366         ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES   = 0x61,
1367         ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
1368         ARMV7_PERFCTR_STREX_EXECUTED_PASSED     = 0x63,
1369         ARMV7_PERFCTR_STREX_EXECUTED_FAILED     = 0x64,
1370         ARMV7_PERFCTR_DATA_EVICTION             = 0x65,
1371         ARMV7_PERFCTR_ISSUE_STAGE_NO_INST       = 0x66,
1372         ARMV7_PERFCTR_ISSUE_STAGE_EMPTY         = 0x67,
1373         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE  = 0x68,
1374
1375         ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
1376
1377         ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST   = 0x70,
1378         ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
1379         ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST  = 0x72,
1380         ARMV7_PERFCTR_FP_EXECUTED_INST          = 0x73,
1381         ARMV7_PERFCTR_NEON_EXECUTED_INST        = 0x74,
1382
1383         ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
1384         ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES  = 0x81,
1385         ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES        = 0x82,
1386         ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES        = 0x83,
1387         ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES  = 0x84,
1388         ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES  = 0x85,
1389         ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES      = 0x86,
1390
1391         ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES  = 0x8A,
1392         ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
1393
1394         ARMV7_PERFCTR_ISB_INST                  = 0x90,
1395         ARMV7_PERFCTR_DSB_INST                  = 0x91,
1396         ARMV7_PERFCTR_DMB_INST                  = 0x92,
1397         ARMV7_PERFCTR_EXT_INTERRUPTS            = 0x93,
1398
1399         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED     = 0xA0,
1400         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED       = 0xA1,
1401         ARMV7_PERFCTR_PLE_FIFO_FLUSH            = 0xA2,
1402         ARMV7_PERFCTR_PLE_RQST_COMPLETED        = 0xA3,
1403         ARMV7_PERFCTR_PLE_FIFO_OVERFLOW         = 0xA4,
1404         ARMV7_PERFCTR_PLE_RQST_PROG             = 0xA5
1405 };
1406
1407 /*
1408  * Cortex-A8 HW events mapping
1409  *
1410  * The hardware events that we support. We do support cache operations but
1411  * we have harvard caches and no way to combine instruction and data
1412  * accesses/misses in hardware.
1413  */
1414 static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1415         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1416         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV7_PERFCTR_INSTR_EXECUTED,
1417         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
1418         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
1419         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1420         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1421         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1422 };
1423
1424 static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1425                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1426                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1427         [C(L1D)] = {
1428                 /*
1429                  * The performance counters don't differentiate between read
1430                  * and write accesses/misses so this isn't strictly correct,
1431                  * but it's the best we can do. Writes and reads get
1432                  * combined.
1433                  */
1434                 [C(OP_READ)] = {
1435                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1436                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1437                 },
1438                 [C(OP_WRITE)] = {
1439                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1440                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1441                 },
1442                 [C(OP_PREFETCH)] = {
1443                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1444                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1445                 },
1446         },
1447         [C(L1I)] = {
1448                 [C(OP_READ)] = {
1449                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1450                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1451                 },
1452                 [C(OP_WRITE)] = {
1453                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1454                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1455                 },
1456                 [C(OP_PREFETCH)] = {
1457                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1458                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1459                 },
1460         },
1461         [C(LL)] = {
1462                 [C(OP_READ)] = {
1463                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1464                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1465                 },
1466                 [C(OP_WRITE)] = {
1467                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1468                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1469                 },
1470                 [C(OP_PREFETCH)] = {
1471                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1472                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1473                 },
1474         },
1475         [C(DTLB)] = {
1476                 /*
1477                  * Only ITLB misses and DTLB refills are supported.
1478                  * If users want the DTLB refills misses a raw counter
1479                  * must be used.
1480                  */
1481                 [C(OP_READ)] = {
1482                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1483                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1484                 },
1485                 [C(OP_WRITE)] = {
1486                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1487                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1488                 },
1489                 [C(OP_PREFETCH)] = {
1490                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1491                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1492                 },
1493         },
1494         [C(ITLB)] = {
1495                 [C(OP_READ)] = {
1496                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1497                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1498                 },
1499                 [C(OP_WRITE)] = {
1500                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1501                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1502                 },
1503                 [C(OP_PREFETCH)] = {
1504                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1505                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1506                 },
1507         },
1508         [C(BPU)] = {
1509                 [C(OP_READ)] = {
1510                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1511                         [C(RESULT_MISS)]
1512                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1513                 },
1514                 [C(OP_WRITE)] = {
1515                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1516                         [C(RESULT_MISS)]
1517                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1518                 },
1519                 [C(OP_PREFETCH)] = {
1520                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1521                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1522                 },
1523         },
1524 };
1525
1526 /*
1527  * Cortex-A9 HW events mapping
1528  */
1529 static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1530         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1531         [PERF_COUNT_HW_INSTRUCTIONS]        =
1532                                         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1533         [PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1534         [PERF_COUNT_HW_CACHE_MISSES]        = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1535         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1536         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1537         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1538 };
1539
1540 static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1541                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1542                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1543         [C(L1D)] = {
1544                 /*
1545                  * The performance counters don't differentiate between read
1546                  * and write accesses/misses so this isn't strictly correct,
1547                  * but it's the best we can do. Writes and reads get
1548                  * combined.
1549                  */
1550                 [C(OP_READ)] = {
1551                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1552                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1553                 },
1554                 [C(OP_WRITE)] = {
1555                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1556                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1557                 },
1558                 [C(OP_PREFETCH)] = {
1559                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1560                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1561                 },
1562         },
1563         [C(L1I)] = {
1564                 [C(OP_READ)] = {
1565                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1566                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1567                 },
1568                 [C(OP_WRITE)] = {
1569                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1570                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1571                 },
1572                 [C(OP_PREFETCH)] = {
1573                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1574                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1575                 },
1576         },
1577         [C(LL)] = {
1578                 [C(OP_READ)] = {
1579                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1580                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1581                 },
1582                 [C(OP_WRITE)] = {
1583                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1584                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1585                 },
1586                 [C(OP_PREFETCH)] = {
1587                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1588                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1589                 },
1590         },
1591         [C(DTLB)] = {
1592                 /*
1593                  * Only ITLB misses and DTLB refills are supported.
1594                  * If users want the DTLB refills misses a raw counter
1595                  * must be used.
1596                  */
1597                 [C(OP_READ)] = {
1598                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1599                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1600                 },
1601                 [C(OP_WRITE)] = {
1602                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1603                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1604                 },
1605                 [C(OP_PREFETCH)] = {
1606                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1607                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1608                 },
1609         },
1610         [C(ITLB)] = {
1611                 [C(OP_READ)] = {
1612                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1613                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1614                 },
1615                 [C(OP_WRITE)] = {
1616                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1617                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1618                 },
1619                 [C(OP_PREFETCH)] = {
1620                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1621                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1622                 },
1623         },
1624         [C(BPU)] = {
1625                 [C(OP_READ)] = {
1626                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1627                         [C(RESULT_MISS)]
1628                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1629                 },
1630                 [C(OP_WRITE)] = {
1631                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1632                         [C(RESULT_MISS)]
1633                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1634                 },
1635                 [C(OP_PREFETCH)] = {
1636                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1637                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1638                 },
1639         },
1640 };
1641
1642 /*
1643  * Perf Events counters
1644  */
1645 enum armv7_counters {
1646         ARMV7_CYCLE_COUNTER             = 1,    /* Cycle counter */
1647         ARMV7_COUNTER0                  = 2,    /* First event counter */
1648 };
1649
1650 /*
1651  * The cycle counter is ARMV7_CYCLE_COUNTER.
1652  * The first event counter is ARMV7_COUNTER0.
1653  * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1654  */
1655 #define ARMV7_COUNTER_LAST      (ARMV7_COUNTER0 + armpmu->num_events - 1)
1656
1657 /*
1658  * ARMv7 low level PMNC access
1659  */
1660
1661 /*
1662  * Per-CPU PMNC: config reg
1663  */
1664 #define ARMV7_PMNC_E            (1 << 0) /* Enable all counters */
1665 #define ARMV7_PMNC_P            (1 << 1) /* Reset all counters */
1666 #define ARMV7_PMNC_C            (1 << 2) /* Cycle counter reset */
1667 #define ARMV7_PMNC_D            (1 << 3) /* CCNT counts every 64th cpu cycle */
1668 #define ARMV7_PMNC_X            (1 << 4) /* Export to ETM */
1669 #define ARMV7_PMNC_DP           (1 << 5) /* Disable CCNT if non-invasive debug*/
1670 #define ARMV7_PMNC_N_SHIFT      11       /* Number of counters supported */
1671 #define ARMV7_PMNC_N_MASK       0x1f
1672 #define ARMV7_PMNC_MASK         0x3f     /* Mask for writable bits */
1673
1674 /*
1675  * Available counters
1676  */
1677 #define ARMV7_CNT0              0       /* First event counter */
1678 #define ARMV7_CCNT              31      /* Cycle counter */
1679
1680 /* Perf Event to low level counters mapping */
1681 #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
1682
1683 /*
1684  * CNTENS: counters enable reg
1685  */
1686 #define ARMV7_CNTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1687 #define ARMV7_CNTENS_C          (1 << ARMV7_CCNT)
1688
1689 /*
1690  * CNTENC: counters disable reg
1691  */
1692 #define ARMV7_CNTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1693 #define ARMV7_CNTENC_C          (1 << ARMV7_CCNT)
1694
1695 /*
1696  * INTENS: counters overflow interrupt enable reg
1697  */
1698 #define ARMV7_INTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1699 #define ARMV7_INTENS_C          (1 << ARMV7_CCNT)
1700
1701 /*
1702  * INTENC: counters overflow interrupt disable reg
1703  */
1704 #define ARMV7_INTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1705 #define ARMV7_INTENC_C          (1 << ARMV7_CCNT)
1706
1707 /*
1708  * EVTSEL: Event selection reg
1709  */
1710 #define ARMV7_EVTSEL_MASK       0xff            /* Mask for writable bits */
1711
1712 /*
1713  * SELECT: Counter selection reg
1714  */
1715 #define ARMV7_SELECT_MASK       0x1f            /* Mask for writable bits */
1716
1717 /*
1718  * FLAG: counters overflow flag status reg
1719  */
1720 #define ARMV7_FLAG_P(idx)       (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1721 #define ARMV7_FLAG_C            (1 << ARMV7_CCNT)
1722 #define ARMV7_FLAG_MASK         0xffffffff      /* Mask for writable bits */
1723 #define ARMV7_OVERFLOWED_MASK   ARMV7_FLAG_MASK
1724
1725 static inline unsigned long armv7_pmnc_read(void)
1726 {
1727         u32 val;
1728         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1729         return val;
1730 }
1731
1732 static inline void armv7_pmnc_write(unsigned long val)
1733 {
1734         val &= ARMV7_PMNC_MASK;
1735         asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1736 }
1737
1738 static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1739 {
1740         return pmnc & ARMV7_OVERFLOWED_MASK;
1741 }
1742
1743 static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1744                                         enum armv7_counters counter)
1745 {
1746         int ret;
1747
1748         if (counter == ARMV7_CYCLE_COUNTER)
1749                 ret = pmnc & ARMV7_FLAG_C;
1750         else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1751                 ret = pmnc & ARMV7_FLAG_P(counter);
1752         else
1753                 pr_err("CPU%u checking wrong counter %d overflow status\n",
1754                         smp_processor_id(), counter);
1755
1756         return ret;
1757 }
1758
1759 static inline int armv7_pmnc_select_counter(unsigned int idx)
1760 {
1761         u32 val;
1762
1763         if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1764                 pr_err("CPU%u selecting wrong PMNC counter"
1765                         " %d\n", smp_processor_id(), idx);
1766                 return -1;
1767         }
1768
1769         val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1770         asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1771
1772         return idx;
1773 }
1774
1775 static inline u32 armv7pmu_read_counter(int idx)
1776 {
1777         unsigned long value = 0;
1778
1779         if (idx == ARMV7_CYCLE_COUNTER)
1780                 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1781         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1782                 if (armv7_pmnc_select_counter(idx) == idx)
1783                         asm volatile("mrc p15, 0, %0, c9, c13, 2"
1784                                      : "=r" (value));
1785         } else
1786                 pr_err("CPU%u reading wrong counter %d\n",
1787                         smp_processor_id(), idx);
1788
1789         return value;
1790 }
1791
1792 static inline void armv7pmu_write_counter(int idx, u32 value)
1793 {
1794         if (idx == ARMV7_CYCLE_COUNTER)
1795                 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1796         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1797                 if (armv7_pmnc_select_counter(idx) == idx)
1798                         asm volatile("mcr p15, 0, %0, c9, c13, 2"
1799                                      : : "r" (value));
1800         } else
1801                 pr_err("CPU%u writing wrong counter %d\n",
1802                         smp_processor_id(), idx);
1803 }
1804
1805 static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1806 {
1807         if (armv7_pmnc_select_counter(idx) == idx) {
1808                 val &= ARMV7_EVTSEL_MASK;
1809                 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1810         }
1811 }
1812
1813 static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1814 {
1815         u32 val;
1816
1817         if ((idx != ARMV7_CYCLE_COUNTER) &&
1818             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1819                 pr_err("CPU%u enabling wrong PMNC counter"
1820                         " %d\n", smp_processor_id(), idx);
1821                 return -1;
1822         }
1823
1824         if (idx == ARMV7_CYCLE_COUNTER)
1825                 val = ARMV7_CNTENS_C;
1826         else
1827                 val = ARMV7_CNTENS_P(idx);
1828
1829         asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1830
1831         return idx;
1832 }
1833
1834 static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1835 {
1836         u32 val;
1837
1838
1839         if ((idx != ARMV7_CYCLE_COUNTER) &&
1840             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1841                 pr_err("CPU%u disabling wrong PMNC counter"
1842                         " %d\n", smp_processor_id(), idx);
1843                 return -1;
1844         }
1845
1846         if (idx == ARMV7_CYCLE_COUNTER)
1847                 val = ARMV7_CNTENC_C;
1848         else
1849                 val = ARMV7_CNTENC_P(idx);
1850
1851         asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1852
1853         return idx;
1854 }
1855
1856 static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1857 {
1858         u32 val;
1859
1860         if ((idx != ARMV7_CYCLE_COUNTER) &&
1861             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1862                 pr_err("CPU%u enabling wrong PMNC counter"
1863                         " interrupt enable %d\n", smp_processor_id(), idx);
1864                 return -1;
1865         }
1866
1867         if (idx == ARMV7_CYCLE_COUNTER)
1868                 val = ARMV7_INTENS_C;
1869         else
1870                 val = ARMV7_INTENS_P(idx);
1871
1872         asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1873
1874         return idx;
1875 }
1876
1877 static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1878 {
1879         u32 val;
1880
1881         if ((idx != ARMV7_CYCLE_COUNTER) &&
1882             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1883                 pr_err("CPU%u disabling wrong PMNC counter"
1884                         " interrupt enable %d\n", smp_processor_id(), idx);
1885                 return -1;
1886         }
1887
1888         if (idx == ARMV7_CYCLE_COUNTER)
1889                 val = ARMV7_INTENC_C;
1890         else
1891                 val = ARMV7_INTENC_P(idx);
1892
1893         asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1894
1895         return idx;
1896 }
1897
1898 static inline u32 armv7_pmnc_getreset_flags(void)
1899 {
1900         u32 val;
1901
1902         /* Read */
1903         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1904
1905         /* Write to clear flags */
1906         val &= ARMV7_FLAG_MASK;
1907         asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1908
1909         return val;
1910 }
1911
1912 #ifdef DEBUG
1913 static void armv7_pmnc_dump_regs(void)
1914 {
1915         u32 val;
1916         unsigned int cnt;
1917
1918         printk(KERN_INFO "PMNC registers dump:\n");
1919
1920         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1921         printk(KERN_INFO "PMNC  =0x%08x\n", val);
1922
1923         asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1924         printk(KERN_INFO "CNTENS=0x%08x\n", val);
1925
1926         asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1927         printk(KERN_INFO "INTENS=0x%08x\n", val);
1928
1929         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1930         printk(KERN_INFO "FLAGS =0x%08x\n", val);
1931
1932         asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1933         printk(KERN_INFO "SELECT=0x%08x\n", val);
1934
1935         asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1936         printk(KERN_INFO "CCNT  =0x%08x\n", val);
1937
1938         for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1939                 armv7_pmnc_select_counter(cnt);
1940                 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1941                 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1942                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1943                 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1944                 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1945                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1946         }
1947 }
1948 #endif
1949
1950 void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1951 {
1952         unsigned long flags;
1953
1954         /*
1955          * Enable counter and interrupt, and set the counter to count
1956          * the event that we're interested in.
1957          */
1958         spin_lock_irqsave(&pmu_lock, flags);
1959
1960         /*
1961          * Disable counter
1962          */
1963         armv7_pmnc_disable_counter(idx);
1964
1965         /*
1966          * Set event (if destined for PMNx counters)
1967          * We don't need to set the event if it's a cycle count
1968          */
1969         if (idx != ARMV7_CYCLE_COUNTER)
1970                 armv7_pmnc_write_evtsel(idx, hwc->config_base);
1971
1972         /*
1973          * Enable interrupt for this counter
1974          */
1975         armv7_pmnc_enable_intens(idx);
1976
1977         /*
1978          * Enable counter
1979          */
1980         armv7_pmnc_enable_counter(idx);
1981
1982         spin_unlock_irqrestore(&pmu_lock, flags);
1983 }
1984
1985 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1986 {
1987         unsigned long flags;
1988
1989         /*
1990          * Disable counter and interrupt
1991          */
1992         spin_lock_irqsave(&pmu_lock, flags);
1993
1994         /*
1995          * Disable counter
1996          */
1997         armv7_pmnc_disable_counter(idx);
1998
1999         /*
2000          * Disable interrupt for this counter
2001          */
2002         armv7_pmnc_disable_intens(idx);
2003
2004         spin_unlock_irqrestore(&pmu_lock, flags);
2005 }
2006
2007 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
2008 {
2009         unsigned long pmnc;
2010         struct perf_sample_data data;
2011         struct cpu_hw_events *cpuc;
2012         struct pt_regs *regs;
2013         int idx;
2014
2015         /*
2016          * Get and reset the IRQ flags
2017          */
2018         pmnc = armv7_pmnc_getreset_flags();
2019
2020         /*
2021          * Did an overflow occur?
2022          */
2023         if (!armv7_pmnc_has_overflowed(pmnc))
2024                 return IRQ_NONE;
2025
2026         /*
2027          * Handle the counter(s) overflow(s)
2028          */
2029         regs = get_irq_regs();
2030
2031         perf_sample_data_init(&data, 0);
2032
2033         cpuc = &__get_cpu_var(cpu_hw_events);
2034         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2035                 struct perf_event *event = cpuc->events[idx];
2036                 struct hw_perf_event *hwc;
2037
2038                 if (!test_bit(idx, cpuc->active_mask))
2039                         continue;
2040
2041                 /*
2042                  * We have a single interrupt for all counters. Check that
2043                  * each counter has overflowed before we process it.
2044                  */
2045                 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2046                         continue;
2047
2048                 hwc = &event->hw;
2049                 armpmu_event_update(event, hwc, idx);
2050                 data.period = event->hw.last_period;
2051                 if (!armpmu_event_set_period(event, hwc, idx))
2052                         continue;
2053
2054                 if (perf_event_overflow(event, 0, &data, regs))
2055                         armpmu->disable(hwc, idx);
2056         }
2057
2058         /*
2059          * Handle the pending perf events.
2060          *
2061          * Note: this call *must* be run with interrupts enabled. For
2062          * platforms that can have the PMU interrupts raised as a PMI, this
2063          * will not work.
2064          */
2065         perf_event_do_pending();
2066
2067         return IRQ_HANDLED;
2068 }
2069
2070 static void armv7pmu_start(void)
2071 {
2072         unsigned long flags;
2073
2074         spin_lock_irqsave(&pmu_lock, flags);
2075         /* Enable all counters */
2076         armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2077         spin_unlock_irqrestore(&pmu_lock, flags);
2078 }
2079
2080 static void armv7pmu_stop(void)
2081 {
2082         unsigned long flags;
2083
2084         spin_lock_irqsave(&pmu_lock, flags);
2085         /* Disable all counters */
2086         armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2087         spin_unlock_irqrestore(&pmu_lock, flags);
2088 }
2089
2090 static inline int armv7_a8_pmu_event_map(int config)
2091 {
2092         int mapping = armv7_a8_perf_map[config];
2093         if (HW_OP_UNSUPPORTED == mapping)
2094                 mapping = -EOPNOTSUPP;
2095         return mapping;
2096 }
2097
2098 static inline int armv7_a9_pmu_event_map(int config)
2099 {
2100         int mapping = armv7_a9_perf_map[config];
2101         if (HW_OP_UNSUPPORTED == mapping)
2102                 mapping = -EOPNOTSUPP;
2103         return mapping;
2104 }
2105
2106 static u64 armv7pmu_raw_event(u64 config)
2107 {
2108         return config & 0xff;
2109 }
2110
2111 static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2112                                   struct hw_perf_event *event)
2113 {
2114         int idx;
2115
2116         /* Always place a cycle counter into the cycle counter. */
2117         if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2118                 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2119                         return -EAGAIN;
2120
2121                 return ARMV7_CYCLE_COUNTER;
2122         } else {
2123                 /*
2124                  * For anything other than a cycle counter, try and use
2125                  * the events counters
2126                  */
2127                 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2128                         if (!test_and_set_bit(idx, cpuc->used_mask))
2129                                 return idx;
2130                 }
2131
2132                 /* The counters are all in use. */
2133                 return -EAGAIN;
2134         }
2135 }
2136
2137 static struct arm_pmu armv7pmu = {
2138         .handle_irq             = armv7pmu_handle_irq,
2139         .enable                 = armv7pmu_enable_event,
2140         .disable                = armv7pmu_disable_event,
2141         .raw_event              = armv7pmu_raw_event,
2142         .read_counter           = armv7pmu_read_counter,
2143         .write_counter          = armv7pmu_write_counter,
2144         .get_event_idx          = armv7pmu_get_event_idx,
2145         .start                  = armv7pmu_start,
2146         .stop                   = armv7pmu_stop,
2147         .max_period             = (1LLU << 32) - 1,
2148 };
2149
2150 static u32 __init armv7_reset_read_pmnc(void)
2151 {
2152         u32 nb_cnt;
2153
2154         /* Initialize & Reset PMNC: C and P bits */
2155         armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2156
2157         /* Read the nb of CNTx counters supported from PMNC */
2158         nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2159
2160         /* Add the CPU cycles counter and return */
2161         return nb_cnt + 1;
2162 }
2163
2164 /*
2165  * ARMv5 [xscale] Performance counter handling code.
2166  *
2167  * Based on xscale OProfile code.
2168  *
2169  * There are two variants of the xscale PMU that we support:
2170  *      - xscale1pmu: 2 event counters and a cycle counter
2171  *      - xscale2pmu: 4 event counters and a cycle counter
2172  * The two variants share event definitions, but have different
2173  * PMU structures.
2174  */
2175
2176 enum xscale_perf_types {
2177         XSCALE_PERFCTR_ICACHE_MISS              = 0x00,
2178         XSCALE_PERFCTR_ICACHE_NO_DELIVER        = 0x01,
2179         XSCALE_PERFCTR_DATA_STALL               = 0x02,
2180         XSCALE_PERFCTR_ITLB_MISS                = 0x03,
2181         XSCALE_PERFCTR_DTLB_MISS                = 0x04,
2182         XSCALE_PERFCTR_BRANCH                   = 0x05,
2183         XSCALE_PERFCTR_BRANCH_MISS              = 0x06,
2184         XSCALE_PERFCTR_INSTRUCTION              = 0x07,
2185         XSCALE_PERFCTR_DCACHE_FULL_STALL        = 0x08,
2186         XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2187         XSCALE_PERFCTR_DCACHE_ACCESS            = 0x0A,
2188         XSCALE_PERFCTR_DCACHE_MISS              = 0x0B,
2189         XSCALE_PERFCTR_DCACHE_WRITE_BACK        = 0x0C,
2190         XSCALE_PERFCTR_PC_CHANGED               = 0x0D,
2191         XSCALE_PERFCTR_BCU_REQUEST              = 0x10,
2192         XSCALE_PERFCTR_BCU_FULL                 = 0x11,
2193         XSCALE_PERFCTR_BCU_DRAIN                = 0x12,
2194         XSCALE_PERFCTR_BCU_ECC_NO_ELOG          = 0x14,
2195         XSCALE_PERFCTR_BCU_1_BIT_ERR            = 0x15,
2196         XSCALE_PERFCTR_RMW                      = 0x16,
2197         /* XSCALE_PERFCTR_CCNT is not hardware defined */
2198         XSCALE_PERFCTR_CCNT                     = 0xFE,
2199         XSCALE_PERFCTR_UNUSED                   = 0xFF,
2200 };
2201
2202 enum xscale_counters {
2203         XSCALE_CYCLE_COUNTER    = 1,
2204         XSCALE_COUNTER0,
2205         XSCALE_COUNTER1,
2206         XSCALE_COUNTER2,
2207         XSCALE_COUNTER3,
2208 };
2209
2210 static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2211         [PERF_COUNT_HW_CPU_CYCLES]          = XSCALE_PERFCTR_CCNT,
2212         [PERF_COUNT_HW_INSTRUCTIONS]        = XSCALE_PERFCTR_INSTRUCTION,
2213         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
2214         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
2215         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2216         [PERF_COUNT_HW_BRANCH_MISSES]       = XSCALE_PERFCTR_BRANCH_MISS,
2217         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
2218 };
2219
2220 static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2221                                            [PERF_COUNT_HW_CACHE_OP_MAX]
2222                                            [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2223         [C(L1D)] = {
2224                 [C(OP_READ)] = {
2225                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2226                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2227                 },
2228                 [C(OP_WRITE)] = {
2229                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2230                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2231                 },
2232                 [C(OP_PREFETCH)] = {
2233                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2234                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2235                 },
2236         },
2237         [C(L1I)] = {
2238                 [C(OP_READ)] = {
2239                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2240                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2241                 },
2242                 [C(OP_WRITE)] = {
2243                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2244                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2245                 },
2246                 [C(OP_PREFETCH)] = {
2247                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2248                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2249                 },
2250         },
2251         [C(LL)] = {
2252                 [C(OP_READ)] = {
2253                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2254                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2255                 },
2256                 [C(OP_WRITE)] = {
2257                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2258                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2259                 },
2260                 [C(OP_PREFETCH)] = {
2261                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2262                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2263                 },
2264         },
2265         [C(DTLB)] = {
2266                 [C(OP_READ)] = {
2267                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2268                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2269                 },
2270                 [C(OP_WRITE)] = {
2271                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2272                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2273                 },
2274                 [C(OP_PREFETCH)] = {
2275                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2276                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2277                 },
2278         },
2279         [C(ITLB)] = {
2280                 [C(OP_READ)] = {
2281                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2282                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2283                 },
2284                 [C(OP_WRITE)] = {
2285                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2286                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2287                 },
2288                 [C(OP_PREFETCH)] = {
2289                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2290                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2291                 },
2292         },
2293         [C(BPU)] = {
2294                 [C(OP_READ)] = {
2295                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2296                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2297                 },
2298                 [C(OP_WRITE)] = {
2299                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2300                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2301                 },
2302                 [C(OP_PREFETCH)] = {
2303                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2304                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2305                 },
2306         },
2307 };
2308
2309 #define XSCALE_PMU_ENABLE       0x001
2310 #define XSCALE_PMN_RESET        0x002
2311 #define XSCALE_CCNT_RESET       0x004
2312 #define XSCALE_PMU_RESET        (CCNT_RESET | PMN_RESET)
2313 #define XSCALE_PMU_CNT64        0x008
2314
2315 static inline int
2316 xscalepmu_event_map(int config)
2317 {
2318         int mapping = xscale_perf_map[config];
2319         if (HW_OP_UNSUPPORTED == mapping)
2320                 mapping = -EOPNOTSUPP;
2321         return mapping;
2322 }
2323
2324 static u64
2325 xscalepmu_raw_event(u64 config)
2326 {
2327         return config & 0xff;
2328 }
2329
2330 #define XSCALE1_OVERFLOWED_MASK 0x700
2331 #define XSCALE1_CCOUNT_OVERFLOW 0x400
2332 #define XSCALE1_COUNT0_OVERFLOW 0x100
2333 #define XSCALE1_COUNT1_OVERFLOW 0x200
2334 #define XSCALE1_CCOUNT_INT_EN   0x040
2335 #define XSCALE1_COUNT0_INT_EN   0x010
2336 #define XSCALE1_COUNT1_INT_EN   0x020
2337 #define XSCALE1_COUNT0_EVT_SHFT 12
2338 #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2339 #define XSCALE1_COUNT1_EVT_SHFT 20
2340 #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2341
2342 static inline u32
2343 xscale1pmu_read_pmnc(void)
2344 {
2345         u32 val;
2346         asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2347         return val;
2348 }
2349
2350 static inline void
2351 xscale1pmu_write_pmnc(u32 val)
2352 {
2353         /* upper 4bits and 7, 11 are write-as-0 */
2354         val &= 0xffff77f;
2355         asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2356 }
2357
2358 static inline int
2359 xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2360                                         enum xscale_counters counter)
2361 {
2362         int ret = 0;
2363
2364         switch (counter) {
2365         case XSCALE_CYCLE_COUNTER:
2366                 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2367                 break;
2368         case XSCALE_COUNTER0:
2369                 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2370                 break;
2371         case XSCALE_COUNTER1:
2372                 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2373                 break;
2374         default:
2375                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2376         }
2377
2378         return ret;
2379 }
2380
2381 static irqreturn_t
2382 xscale1pmu_handle_irq(int irq_num, void *dev)
2383 {
2384         unsigned long pmnc;
2385         struct perf_sample_data data;
2386         struct cpu_hw_events *cpuc;
2387         struct pt_regs *regs;
2388         int idx;
2389
2390         /*
2391          * NOTE: there's an A stepping erratum that states if an overflow
2392          *       bit already exists and another occurs, the previous
2393          *       Overflow bit gets cleared. There's no workaround.
2394          *       Fixed in B stepping or later.
2395          */
2396         pmnc = xscale1pmu_read_pmnc();
2397
2398         /*
2399          * Write the value back to clear the overflow flags. Overflow
2400          * flags remain in pmnc for use below. We also disable the PMU
2401          * while we process the interrupt.
2402          */
2403         xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2404
2405         if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2406                 return IRQ_NONE;
2407
2408         regs = get_irq_regs();
2409
2410         perf_sample_data_init(&data, 0);
2411
2412         cpuc = &__get_cpu_var(cpu_hw_events);
2413         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2414                 struct perf_event *event = cpuc->events[idx];
2415                 struct hw_perf_event *hwc;
2416
2417                 if (!test_bit(idx, cpuc->active_mask))
2418                         continue;
2419
2420                 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2421                         continue;
2422
2423                 hwc = &event->hw;
2424                 armpmu_event_update(event, hwc, idx);
2425                 data.period = event->hw.last_period;
2426                 if (!armpmu_event_set_period(event, hwc, idx))
2427                         continue;
2428
2429                 if (perf_event_overflow(event, 0, &data, regs))
2430                         armpmu->disable(hwc, idx);
2431         }
2432
2433         perf_event_do_pending();
2434
2435         /*
2436          * Re-enable the PMU.
2437          */
2438         pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2439         xscale1pmu_write_pmnc(pmnc);
2440
2441         return IRQ_HANDLED;
2442 }
2443
2444 static void
2445 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2446 {
2447         unsigned long val, mask, evt, flags;
2448
2449         switch (idx) {
2450         case XSCALE_CYCLE_COUNTER:
2451                 mask = 0;
2452                 evt = XSCALE1_CCOUNT_INT_EN;
2453                 break;
2454         case XSCALE_COUNTER0:
2455                 mask = XSCALE1_COUNT0_EVT_MASK;
2456                 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2457                         XSCALE1_COUNT0_INT_EN;
2458                 break;
2459         case XSCALE_COUNTER1:
2460                 mask = XSCALE1_COUNT1_EVT_MASK;
2461                 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2462                         XSCALE1_COUNT1_INT_EN;
2463                 break;
2464         default:
2465                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2466                 return;
2467         }
2468
2469         spin_lock_irqsave(&pmu_lock, flags);
2470         val = xscale1pmu_read_pmnc();
2471         val &= ~mask;
2472         val |= evt;
2473         xscale1pmu_write_pmnc(val);
2474         spin_unlock_irqrestore(&pmu_lock, flags);
2475 }
2476
2477 static void
2478 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2479 {
2480         unsigned long val, mask, evt, flags;
2481
2482         switch (idx) {
2483         case XSCALE_CYCLE_COUNTER:
2484                 mask = XSCALE1_CCOUNT_INT_EN;
2485                 evt = 0;
2486                 break;
2487         case XSCALE_COUNTER0:
2488                 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2489                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2490                 break;
2491         case XSCALE_COUNTER1:
2492                 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2493                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2494                 break;
2495         default:
2496                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2497                 return;
2498         }
2499
2500         spin_lock_irqsave(&pmu_lock, flags);
2501         val = xscale1pmu_read_pmnc();
2502         val &= ~mask;
2503         val |= evt;
2504         xscale1pmu_write_pmnc(val);
2505         spin_unlock_irqrestore(&pmu_lock, flags);
2506 }
2507
2508 static int
2509 xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2510                         struct hw_perf_event *event)
2511 {
2512         if (XSCALE_PERFCTR_CCNT == event->config_base) {
2513                 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2514                         return -EAGAIN;
2515
2516                 return XSCALE_CYCLE_COUNTER;
2517         } else {
2518                 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2519                         return XSCALE_COUNTER1;
2520                 }
2521
2522                 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2523                         return XSCALE_COUNTER0;
2524                 }
2525
2526                 return -EAGAIN;
2527         }
2528 }
2529
2530 static void
2531 xscale1pmu_start(void)
2532 {
2533         unsigned long flags, val;
2534
2535         spin_lock_irqsave(&pmu_lock, flags);
2536         val = xscale1pmu_read_pmnc();
2537         val |= XSCALE_PMU_ENABLE;
2538         xscale1pmu_write_pmnc(val);
2539         spin_unlock_irqrestore(&pmu_lock, flags);
2540 }
2541
2542 static void
2543 xscale1pmu_stop(void)
2544 {
2545         unsigned long flags, val;
2546
2547         spin_lock_irqsave(&pmu_lock, flags);
2548         val = xscale1pmu_read_pmnc();
2549         val &= ~XSCALE_PMU_ENABLE;
2550         xscale1pmu_write_pmnc(val);
2551         spin_unlock_irqrestore(&pmu_lock, flags);
2552 }
2553
2554 static inline u32
2555 xscale1pmu_read_counter(int counter)
2556 {
2557         u32 val = 0;
2558
2559         switch (counter) {
2560         case XSCALE_CYCLE_COUNTER:
2561                 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2562                 break;
2563         case XSCALE_COUNTER0:
2564                 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2565                 break;
2566         case XSCALE_COUNTER1:
2567                 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2568                 break;
2569         }
2570
2571         return val;
2572 }
2573
2574 static inline void
2575 xscale1pmu_write_counter(int counter, u32 val)
2576 {
2577         switch (counter) {
2578         case XSCALE_CYCLE_COUNTER:
2579                 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2580                 break;
2581         case XSCALE_COUNTER0:
2582                 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2583                 break;
2584         case XSCALE_COUNTER1:
2585                 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2586                 break;
2587         }
2588 }
2589
2590 static const struct arm_pmu xscale1pmu = {
2591         .id             = ARM_PERF_PMU_ID_XSCALE1,
2592         .handle_irq     = xscale1pmu_handle_irq,
2593         .enable         = xscale1pmu_enable_event,
2594         .disable        = xscale1pmu_disable_event,
2595         .event_map      = xscalepmu_event_map,
2596         .raw_event      = xscalepmu_raw_event,
2597         .read_counter   = xscale1pmu_read_counter,
2598         .write_counter  = xscale1pmu_write_counter,
2599         .get_event_idx  = xscale1pmu_get_event_idx,
2600         .start          = xscale1pmu_start,
2601         .stop           = xscale1pmu_stop,
2602         .num_events     = 3,
2603         .max_period     = (1LLU << 32) - 1,
2604 };
2605
2606 #define XSCALE2_OVERFLOWED_MASK 0x01f
2607 #define XSCALE2_CCOUNT_OVERFLOW 0x001
2608 #define XSCALE2_COUNT0_OVERFLOW 0x002
2609 #define XSCALE2_COUNT1_OVERFLOW 0x004
2610 #define XSCALE2_COUNT2_OVERFLOW 0x008
2611 #define XSCALE2_COUNT3_OVERFLOW 0x010
2612 #define XSCALE2_CCOUNT_INT_EN   0x001
2613 #define XSCALE2_COUNT0_INT_EN   0x002
2614 #define XSCALE2_COUNT1_INT_EN   0x004
2615 #define XSCALE2_COUNT2_INT_EN   0x008
2616 #define XSCALE2_COUNT3_INT_EN   0x010
2617 #define XSCALE2_COUNT0_EVT_SHFT 0
2618 #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2619 #define XSCALE2_COUNT1_EVT_SHFT 8
2620 #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2621 #define XSCALE2_COUNT2_EVT_SHFT 16
2622 #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2623 #define XSCALE2_COUNT3_EVT_SHFT 24
2624 #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2625
2626 static inline u32
2627 xscale2pmu_read_pmnc(void)
2628 {
2629         u32 val;
2630         asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2631         /* bits 1-2 and 4-23 are read-unpredictable */
2632         return val & 0xff000009;
2633 }
2634
2635 static inline void
2636 xscale2pmu_write_pmnc(u32 val)
2637 {
2638         /* bits 4-23 are write-as-0, 24-31 are write ignored */
2639         val &= 0xf;
2640         asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2641 }
2642
2643 static inline u32
2644 xscale2pmu_read_overflow_flags(void)
2645 {
2646         u32 val;
2647         asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2648         return val;
2649 }
2650
2651 static inline void
2652 xscale2pmu_write_overflow_flags(u32 val)
2653 {
2654         asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2655 }
2656
2657 static inline u32
2658 xscale2pmu_read_event_select(void)
2659 {
2660         u32 val;
2661         asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2662         return val;
2663 }
2664
2665 static inline void
2666 xscale2pmu_write_event_select(u32 val)
2667 {
2668         asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2669 }
2670
2671 static inline u32
2672 xscale2pmu_read_int_enable(void)
2673 {
2674         u32 val;
2675         asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2676         return val;
2677 }
2678
2679 static void
2680 xscale2pmu_write_int_enable(u32 val)
2681 {
2682         asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2683 }
2684
2685 static inline int
2686 xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2687                                         enum xscale_counters counter)
2688 {
2689         int ret = 0;
2690
2691         switch (counter) {
2692         case XSCALE_CYCLE_COUNTER:
2693                 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2694                 break;
2695         case XSCALE_COUNTER0:
2696                 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2697                 break;
2698         case XSCALE_COUNTER1:
2699                 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2700                 break;
2701         case XSCALE_COUNTER2:
2702                 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2703                 break;
2704         case XSCALE_COUNTER3:
2705                 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2706                 break;
2707         default:
2708                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2709         }
2710
2711         return ret;
2712 }
2713
2714 static irqreturn_t
2715 xscale2pmu_handle_irq(int irq_num, void *dev)
2716 {
2717         unsigned long pmnc, of_flags;
2718         struct perf_sample_data data;
2719         struct cpu_hw_events *cpuc;
2720         struct pt_regs *regs;
2721         int idx;
2722
2723         /* Disable the PMU. */
2724         pmnc = xscale2pmu_read_pmnc();
2725         xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2726
2727         /* Check the overflow flag register. */
2728         of_flags = xscale2pmu_read_overflow_flags();
2729         if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2730                 return IRQ_NONE;
2731
2732         /* Clear the overflow bits. */
2733         xscale2pmu_write_overflow_flags(of_flags);
2734
2735         regs = get_irq_regs();
2736
2737         perf_sample_data_init(&data, 0);
2738
2739         cpuc = &__get_cpu_var(cpu_hw_events);
2740         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2741                 struct perf_event *event = cpuc->events[idx];
2742                 struct hw_perf_event *hwc;
2743
2744                 if (!test_bit(idx, cpuc->active_mask))
2745                         continue;
2746
2747                 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2748                         continue;
2749
2750                 hwc = &event->hw;
2751                 armpmu_event_update(event, hwc, idx);
2752                 data.period = event->hw.last_period;
2753                 if (!armpmu_event_set_period(event, hwc, idx))
2754                         continue;
2755
2756                 if (perf_event_overflow(event, 0, &data, regs))
2757                         armpmu->disable(hwc, idx);
2758         }
2759
2760         perf_event_do_pending();
2761
2762         /*
2763          * Re-enable the PMU.
2764          */
2765         pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2766         xscale2pmu_write_pmnc(pmnc);
2767
2768         return IRQ_HANDLED;
2769 }
2770
2771 static void
2772 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2773 {
2774         unsigned long flags, ien, evtsel;
2775
2776         ien = xscale2pmu_read_int_enable();
2777         evtsel = xscale2pmu_read_event_select();
2778
2779         switch (idx) {
2780         case XSCALE_CYCLE_COUNTER:
2781                 ien |= XSCALE2_CCOUNT_INT_EN;
2782                 break;
2783         case XSCALE_COUNTER0:
2784                 ien |= XSCALE2_COUNT0_INT_EN;
2785                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2786                 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2787                 break;
2788         case XSCALE_COUNTER1:
2789                 ien |= XSCALE2_COUNT1_INT_EN;
2790                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2791                 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2792                 break;
2793         case XSCALE_COUNTER2:
2794                 ien |= XSCALE2_COUNT2_INT_EN;
2795                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2796                 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2797                 break;
2798         case XSCALE_COUNTER3:
2799                 ien |= XSCALE2_COUNT3_INT_EN;
2800                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2801                 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2802                 break;
2803         default:
2804                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2805                 return;
2806         }
2807
2808         spin_lock_irqsave(&pmu_lock, flags);
2809         xscale2pmu_write_event_select(evtsel);
2810         xscale2pmu_write_int_enable(ien);
2811         spin_unlock_irqrestore(&pmu_lock, flags);
2812 }
2813
2814 static void
2815 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2816 {
2817         unsigned long flags, ien, evtsel;
2818
2819         ien = xscale2pmu_read_int_enable();
2820         evtsel = xscale2pmu_read_event_select();
2821
2822         switch (idx) {
2823         case XSCALE_CYCLE_COUNTER:
2824                 ien &= ~XSCALE2_CCOUNT_INT_EN;
2825                 break;
2826         case XSCALE_COUNTER0:
2827                 ien &= ~XSCALE2_COUNT0_INT_EN;
2828                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2829                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2830                 break;
2831         case XSCALE_COUNTER1:
2832                 ien &= ~XSCALE2_COUNT1_INT_EN;
2833                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2834                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2835                 break;
2836         case XSCALE_COUNTER2:
2837                 ien &= ~XSCALE2_COUNT2_INT_EN;
2838                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2839                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2840                 break;
2841         case XSCALE_COUNTER3:
2842                 ien &= ~XSCALE2_COUNT3_INT_EN;
2843                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2844                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2845                 break;
2846         default:
2847                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2848                 return;
2849         }
2850
2851         spin_lock_irqsave(&pmu_lock, flags);
2852         xscale2pmu_write_event_select(evtsel);
2853         xscale2pmu_write_int_enable(ien);
2854         spin_unlock_irqrestore(&pmu_lock, flags);
2855 }
2856
2857 static int
2858 xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2859                         struct hw_perf_event *event)
2860 {
2861         int idx = xscale1pmu_get_event_idx(cpuc, event);
2862         if (idx >= 0)
2863                 goto out;
2864
2865         if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2866                 idx = XSCALE_COUNTER3;
2867         else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2868                 idx = XSCALE_COUNTER2;
2869 out:
2870         return idx;
2871 }
2872
2873 static void
2874 xscale2pmu_start(void)
2875 {
2876         unsigned long flags, val;
2877
2878         spin_lock_irqsave(&pmu_lock, flags);
2879         val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2880         val |= XSCALE_PMU_ENABLE;
2881         xscale2pmu_write_pmnc(val);
2882         spin_unlock_irqrestore(&pmu_lock, flags);
2883 }
2884
2885 static void
2886 xscale2pmu_stop(void)
2887 {
2888         unsigned long flags, val;
2889
2890         spin_lock_irqsave(&pmu_lock, flags);
2891         val = xscale2pmu_read_pmnc();
2892         val &= ~XSCALE_PMU_ENABLE;
2893         xscale2pmu_write_pmnc(val);
2894         spin_unlock_irqrestore(&pmu_lock, flags);
2895 }
2896
2897 static inline u32
2898 xscale2pmu_read_counter(int counter)
2899 {
2900         u32 val = 0;
2901
2902         switch (counter) {
2903         case XSCALE_CYCLE_COUNTER:
2904                 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2905                 break;
2906         case XSCALE_COUNTER0:
2907                 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2908                 break;
2909         case XSCALE_COUNTER1:
2910                 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2911                 break;
2912         case XSCALE_COUNTER2:
2913                 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2914                 break;
2915         case XSCALE_COUNTER3:
2916                 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2917                 break;
2918         }
2919
2920         return val;
2921 }
2922
2923 static inline void
2924 xscale2pmu_write_counter(int counter, u32 val)
2925 {
2926         switch (counter) {
2927         case XSCALE_CYCLE_COUNTER:
2928                 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2929                 break;
2930         case XSCALE_COUNTER0:
2931                 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2932                 break;
2933         case XSCALE_COUNTER1:
2934                 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2935                 break;
2936         case XSCALE_COUNTER2:
2937                 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2938                 break;
2939         case XSCALE_COUNTER3:
2940                 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2941                 break;
2942         }
2943 }
2944
2945 static const struct arm_pmu xscale2pmu = {
2946         .id             = ARM_PERF_PMU_ID_XSCALE2,
2947         .handle_irq     = xscale2pmu_handle_irq,
2948         .enable         = xscale2pmu_enable_event,
2949         .disable        = xscale2pmu_disable_event,
2950         .event_map      = xscalepmu_event_map,
2951         .raw_event      = xscalepmu_raw_event,
2952         .read_counter   = xscale2pmu_read_counter,
2953         .write_counter  = xscale2pmu_write_counter,
2954         .get_event_idx  = xscale2pmu_get_event_idx,
2955         .start          = xscale2pmu_start,
2956         .stop           = xscale2pmu_stop,
2957         .num_events     = 5,
2958         .max_period     = (1LLU << 32) - 1,
2959 };
2960
2961 static int __init
2962 init_hw_perf_events(void)
2963 {
2964         unsigned long cpuid = read_cpuid_id();
2965         unsigned long implementor = (cpuid & 0xFF000000) >> 24;
2966         unsigned long part_number = (cpuid & 0xFFF0);
2967
2968         /* ARM Ltd CPUs. */
2969         if (0x41 == implementor) {
2970                 switch (part_number) {
2971                 case 0xB360:    /* ARM1136 */
2972                 case 0xB560:    /* ARM1156 */
2973                 case 0xB760:    /* ARM1176 */
2974                         armpmu = &armv6pmu;
2975                         memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2976                                         sizeof(armv6_perf_cache_map));
2977                         perf_max_events = armv6pmu.num_events;
2978                         break;
2979                 case 0xB020:    /* ARM11mpcore */
2980                         armpmu = &armv6mpcore_pmu;
2981                         memcpy(armpmu_perf_cache_map,
2982                                armv6mpcore_perf_cache_map,
2983                                sizeof(armv6mpcore_perf_cache_map));
2984                         perf_max_events = armv6mpcore_pmu.num_events;
2985                         break;
2986                 case 0xC080:    /* Cortex-A8 */
2987                         armv7pmu.id = ARM_PERF_PMU_ID_CA8;
2988                         memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
2989                                 sizeof(armv7_a8_perf_cache_map));
2990                         armv7pmu.event_map = armv7_a8_pmu_event_map;
2991                         armpmu = &armv7pmu;
2992
2993                         /* Reset PMNC and read the nb of CNTx counters
2994                             supported */
2995                         armv7pmu.num_events = armv7_reset_read_pmnc();
2996                         perf_max_events = armv7pmu.num_events;
2997                         break;
2998                 case 0xC090:    /* Cortex-A9 */
2999                         armv7pmu.id = ARM_PERF_PMU_ID_CA9;
3000                         memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
3001                                 sizeof(armv7_a9_perf_cache_map));
3002                         armv7pmu.event_map = armv7_a9_pmu_event_map;
3003                         armpmu = &armv7pmu;
3004
3005                         /* Reset PMNC and read the nb of CNTx counters
3006                             supported */
3007                         armv7pmu.num_events = armv7_reset_read_pmnc();
3008                         perf_max_events = armv7pmu.num_events;
3009                         break;
3010                 }
3011         /* Intel CPUs [xscale]. */
3012         } else if (0x69 == implementor) {
3013                 part_number = (cpuid >> 13) & 0x7;
3014                 switch (part_number) {
3015                 case 1:
3016                         armpmu = &xscale1pmu;
3017                         memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
3018                                         sizeof(xscale_perf_cache_map));
3019                         perf_max_events = xscale1pmu.num_events;
3020                         break;
3021                 case 2:
3022                         armpmu = &xscale2pmu;
3023                         memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
3024                                         sizeof(xscale_perf_cache_map));
3025                         perf_max_events = xscale2pmu.num_events;
3026                         break;
3027                 }
3028         }
3029
3030         if (armpmu) {
3031                 pr_info("enabled with %s PMU driver, %d counters available\n",
3032                                 arm_pmu_names[armpmu->id], armpmu->num_events);
3033         } else {
3034                 pr_info("no hardware support available\n");
3035                 perf_max_events = -1;
3036         }
3037
3038         perf_pmu_register(&pmu);
3039
3040         return 0;
3041 }
3042 arch_initcall(init_hw_perf_events);
3043
3044 /*
3045  * Callchain handling code.
3046  */
3047
3048 /*
3049  * The registers we're interested in are at the end of the variable
3050  * length saved register structure. The fp points at the end of this
3051  * structure so the address of this struct is:
3052  * (struct frame_tail *)(xxx->fp)-1
3053  *
3054  * This code has been adapted from the ARM OProfile support.
3055  */
3056 struct frame_tail {
3057         struct frame_tail   *fp;
3058         unsigned long       sp;
3059         unsigned long       lr;
3060 } __attribute__((packed));
3061
3062 /*
3063  * Get the return address for a single stackframe and return a pointer to the
3064  * next frame tail.
3065  */
3066 static struct frame_tail *
3067 user_backtrace(struct frame_tail *tail,
3068                struct perf_callchain_entry *entry)
3069 {
3070         struct frame_tail buftail;
3071
3072         /* Also check accessibility of one struct frame_tail beyond */
3073         if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
3074                 return NULL;
3075         if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3076                 return NULL;
3077
3078         perf_callchain_store(entry, buftail.lr);
3079
3080         /*
3081          * Frame pointers should strictly progress back up the stack
3082          * (towards higher addresses).
3083          */
3084         if (tail >= buftail.fp)
3085                 return NULL;
3086
3087         return buftail.fp - 1;
3088 }
3089
3090 void
3091 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3092 {
3093         struct frame_tail *tail;
3094
3095
3096         tail = (struct frame_tail *)regs->ARM_fp - 1;
3097
3098         while (tail && !((unsigned long)tail & 0x3))
3099                 tail = user_backtrace(tail, entry);
3100 }
3101
3102 /*
3103  * Gets called by walk_stackframe() for every stackframe. This will be called
3104  * whist unwinding the stackframe and is like a subroutine return so we use
3105  * the PC.
3106  */
3107 static int
3108 callchain_trace(struct stackframe *fr,
3109                 void *data)
3110 {
3111         struct perf_callchain_entry *entry = data;
3112         perf_callchain_store(entry, fr->pc);
3113         return 0;
3114 }
3115
3116 void
3117 perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3118 {
3119         struct stackframe fr;
3120
3121         fr.fp = regs->ARM_fp;
3122         fr.sp = regs->ARM_sp;
3123         fr.lr = regs->ARM_lr;
3124         fr.pc = regs->ARM_pc;
3125         walk_stackframe(&fr, callchain_trace, entry);
3126 }