arch/powerpc/kernel/perf_counter.c

   1 /*
   2  * Performance counter support - powerpc architecture code
   3  *
   4  * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License
   8  * as published by the Free Software Foundation; either version
   9  * 2 of the License, or (at your option) any later version.
  10  */
  11 #include <linux/kernel.h>
  12 #include <linux/sched.h>
  13 #include <linux/perf_counter.h>
  14 #include <linux/percpu.h>
  15 #include <linux/hardirq.h>
  16 #include <asm/reg.h>
  17 #include <asm/pmc.h>
  18 #include <asm/machdep.h>
  19
  20 struct cpu_hw_counters {
  21         int n_counters;
  22         int n_percpu;
  23         int disabled;
  24         int n_added;
  25         struct perf_counter *counter[MAX_HWCOUNTERS];
  26         unsigned int events[MAX_HWCOUNTERS];
  27         u64 mmcr[3];
  28         u8 pmcs_enabled;
  29 };
  30 DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
  31
  32 struct power_pmu *ppmu;
  33
  34 void perf_counter_print_debug(void)
  35 {
  36 }
  37
  38 /*
  39  * Return 1 for a software counter, 0 for a hardware counter
  40  */
  41 static inline int is_software_counter(struct perf_counter *counter)
  42 {
  43         return !counter->hw_event.raw && counter->hw_event.type < 0;
  44 }
  45
  46 /*
  47  * Read one performance monitor counter (PMC).
  48  */
  49 static unsigned long read_pmc(int idx)
  50 {
  51         unsigned long val;
  52
  53         switch (idx) {
  54         case 1:
  55                 val = mfspr(SPRN_PMC1);
  56                 break;
  57         case 2:
  58                 val = mfspr(SPRN_PMC2);
  59                 break;
  60         case 3:
  61                 val = mfspr(SPRN_PMC3);
  62                 break;
  63         case 4:
  64                 val = mfspr(SPRN_PMC4);
  65                 break;
  66         case 5:
  67                 val = mfspr(SPRN_PMC5);
  68                 break;
  69         case 6:
  70                 val = mfspr(SPRN_PMC6);
  71                 break;
  72         case 7:
  73                 val = mfspr(SPRN_PMC7);
  74                 break;
  75         case 8:
  76                 val = mfspr(SPRN_PMC8);
  77                 break;
  78         default:
  79                 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
  80                 val = 0;
  81         }
  82         return val;
  83 }
  84
  85 /*
  86  * Write one PMC.
  87  */
  88 static void write_pmc(int idx, unsigned long val)
  89 {
  90         switch (idx) {
  91         case 1:
  92                 mtspr(SPRN_PMC1, val);
  93                 break;
  94         case 2:
  95                 mtspr(SPRN_PMC2, val);
  96                 break;
  97         case 3:
  98                 mtspr(SPRN_PMC3, val);
  99                 break;
 100         case 4:
 101                 mtspr(SPRN_PMC4, val);
 102                 break;
 103         case 5:
 104                 mtspr(SPRN_PMC5, val);
 105                 break;
 106         case 6:
 107                 mtspr(SPRN_PMC6, val);
 108                 break;
 109         case 7:
 110                 mtspr(SPRN_PMC7, val);
 111                 break;
 112         case 8:
 113                 mtspr(SPRN_PMC8, val);
 114                 break;
 115         default:
 116                 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
 117         }
 118 }
 119
 120 /*
 121  * Check if a set of events can all go on the PMU at once.
 122  * If they can't, this will look at alternative codes for the events
 123  * and see if any combination of alternative codes is feasible.
 124  * The feasible set is returned in event[].
 125  */
 126 static int power_check_constraints(unsigned int event[], int n_ev)
 127 {
 128         u64 mask, value, nv;
 129         unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
 130         u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
 131         u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
 132         u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
 133         int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
 134         int i, j;
 135         u64 addf = ppmu->add_fields;
 136         u64 tadd = ppmu->test_adder;
 137
 138         if (n_ev > ppmu->n_counter)
 139                 return -1;
 140
 141         /* First see if the events will go on as-is */
 142         for (i = 0; i < n_ev; ++i) {
 143                 alternatives[i][0] = event[i];
 144                 if (ppmu->get_constraint(event[i], &amasks[i][0],
 145                                          &avalues[i][0]))
 146                         return -1;
 147                 choice[i] = 0;
 148         }
 149         value = mask = 0;
 150         for (i = 0; i < n_ev; ++i) {
 151                 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
 152                 if ((((nv + tadd) ^ value) & mask) != 0 ||
 153                     (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
 154                         break;
 155                 value = nv;
 156                 mask |= amasks[i][0];
 157         }
 158         if (i == n_ev)
 159                 return 0;       /* all OK */
 160
 161         /* doesn't work, gather alternatives... */
 162         if (!ppmu->get_alternatives)
 163                 return -1;
 164         for (i = 0; i < n_ev; ++i) {
 165                 n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
 166                 for (j = 1; j < n_alt[i]; ++j)
 167                         ppmu->get_constraint(alternatives[i][j],
 168                                              &amasks[i][j], &avalues[i][j]);
 169         }
 170
 171         /* enumerate all possibilities and see if any will work */
 172         i = 0;
 173         j = -1;
 174         value = mask = nv = 0;
 175         while (i < n_ev) {
 176                 if (j >= 0) {
 177                         /* we're backtracking, restore context */
 178                         value = svalues[i];
 179                         mask = smasks[i];
 180                         j = choice[i];
 181                 }
 182                 /*
 183                  * See if any alternative k for event i,
 184                  * where k > j, will satisfy the constraints.
 185                  */
 186                 while (++j < n_alt[i]) {
 187                         nv = (value | avalues[i][j]) +
 188                                 (value & avalues[i][j] & addf);
 189                         if ((((nv + tadd) ^ value) & mask) == 0 &&
 190                             (((nv + tadd) ^ avalues[i][j])
 191                              & amasks[i][j]) == 0)
 192                                 break;
 193                 }
 194                 if (j >= n_alt[i]) {
 195                         /*
 196                          * No feasible alternative, backtrack
 197                          * to event i-1 and continue enumerating its
 198                          * alternatives from where we got up to.
 199                          */
 200                         if (--i < 0)
 201                                 return -1;
 202                 } else {
 203                         /*
 204                          * Found a feasible alternative for event i,
 205                          * remember where we got up to with this event,
 206                          * go on to the next event, and start with
 207                          * the first alternative for it.
 208                          */
 209                         choice[i] = j;
 210                         svalues[i] = value;
 211                         smasks[i] = mask;
 212                         value = nv;
 213                         mask |= amasks[i][j];
 214                         ++i;
 215                         j = -1;
 216                 }
 217         }
 218
 219         /* OK, we have a feasible combination, tell the caller the solution */
 220         for (i = 0; i < n_ev; ++i)
 221                 event[i] = alternatives[i][choice[i]];
 222         return 0;
 223 }
 224
 225 static void power_perf_read(struct perf_counter *counter)
 226 {
 227         long val, delta, prev;
 228
 229         if (!counter->hw.idx)
 230                 return;
 231         /*
 232          * Performance monitor interrupts come even when interrupts
 233          * are soft-disabled, as long as interrupts are hard-enabled.
 234          * Therefore we treat them like NMIs.
 235          */
 236         do {
 237                 prev = atomic64_read(&counter->hw.prev_count);
 238                 barrier();
 239                 val = read_pmc(counter->hw.idx);
 240         } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
 241
 242         /* The counters are only 32 bits wide */
 243         delta = (val - prev) & 0xfffffffful;
 244         atomic64_add(delta, &counter->count);
 245         atomic64_sub(delta, &counter->hw.period_left);
 246 }
 247
 248 /*
 249  * Disable all counters to prevent PMU interrupts and to allow
 250  * counters to be added or removed.
 251  */
 252 u64 hw_perf_save_disable(void)
 253 {
 254         struct cpu_hw_counters *cpuhw;
 255         unsigned long ret;
 256         unsigned long flags;
 257
 258         local_irq_save(flags);
 259         cpuhw = &__get_cpu_var(cpu_hw_counters);
 260
 261         ret = cpuhw->disabled;
 262         if (!ret) {
 263                 cpuhw->disabled = 1;
 264                 cpuhw->n_added = 0;
 265
 266                 /*
 267                  * Check if we ever enabled the PMU on this cpu.
 268                  */
 269                 if (!cpuhw->pmcs_enabled) {
 270                         if (ppc_md.enable_pmcs)
 271                                 ppc_md.enable_pmcs();
 272                         cpuhw->pmcs_enabled = 1;
 273                 }
 274
 275                 /*
 276                  * Set the 'freeze counters' bit.
 277                  * The barrier is to make sure the mtspr has been
 278                  * executed and the PMU has frozen the counters
 279                  * before we return.
 280                  */
 281                 mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
 282                 mb();
 283         }
 284         local_irq_restore(flags);
 285         return ret;
 286 }
 287
 288 /*
 289  * Re-enable all counters if disable == 0.
 290  * If we were previously disabled and counters were added, then
 291  * put the new config on the PMU.
 292  */
 293 void hw_perf_restore(u64 disable)
 294 {
 295         struct perf_counter *counter;
 296         struct cpu_hw_counters *cpuhw;
 297         unsigned long flags;
 298         long i;
 299         unsigned long val;
 300         s64 left;
 301         unsigned int hwc_index[MAX_HWCOUNTERS];
 302
 303         if (disable)
 304                 return;
 305         local_irq_save(flags);
 306         cpuhw = &__get_cpu_var(cpu_hw_counters);
 307         cpuhw->disabled = 0;
 308
 309         /*
 310          * If we didn't change anything, or only removed counters,
 311          * no need to recalculate MMCR* settings and reset the PMCs.
 312          * Just reenable the PMU with the current MMCR* settings
 313          * (possibly updated for removal of counters).
 314          */
 315         if (!cpuhw->n_added) {
 316                 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
 317                 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
 318                 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
 319                 if (cpuhw->n_counters == 0)
 320                         get_lppaca()->pmcregs_in_use = 0;
 321                 goto out;
 322         }
 323
 324         /*
 325          * Compute MMCR* values for the new set of counters
 326          */
 327         if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
 328                                cpuhw->mmcr)) {
 329                 /* shouldn't ever get here */
 330                 printk(KERN_ERR "oops compute_mmcr failed\n");
 331                 goto out;
 332         }
 333
 334         /*
 335          * Write the new configuration to MMCR* with the freeze
 336          * bit set and set the hardware counters to their initial values.
 337          * Then unfreeze the counters.
 338          */
 339         get_lppaca()->pmcregs_in_use = 1;
 340         mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
 341         mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
 342         mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
 343                                 | MMCR0_FC);
 344
 345         /*
 346          * Read off any pre-existing counters that need to move
 347          * to another PMC.
 348          */
 349         for (i = 0; i < cpuhw->n_counters; ++i) {
 350                 counter = cpuhw->counter[i];
 351                 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
 352                         power_perf_read(counter);
 353                         write_pmc(counter->hw.idx, 0);
 354                         counter->hw.idx = 0;
 355                 }
 356         }
 357
 358         /*
 359          * Initialize the PMCs for all the new and moved counters.
 360          */
 361         for (i = 0; i < cpuhw->n_counters; ++i) {
 362                 counter = cpuhw->counter[i];
 363                 if (counter->hw.idx)
 364                         continue;
 365                 val = 0;
 366                 if (counter->hw_event.irq_period) {
 367                         left = atomic64_read(&counter->hw.period_left);
 368                         if (left < 0x80000000L)
 369                                 val = 0x80000000L - left;
 370                 }
 371                 atomic64_set(&counter->hw.prev_count, val);
 372                 counter->hw.idx = hwc_index[i] + 1;
 373                 write_pmc(counter->hw.idx, val);
 374         }
 375         mb();
 376         cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
 377         mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
 378
 379  out:
 380         local_irq_restore(flags);
 381 }
 382
 383 static int collect_events(struct perf_counter *group, int max_count,
 384                           struct perf_counter *ctrs[], unsigned int *events)
 385 {
 386         int n = 0;
 387         struct perf_counter *counter;
 388
 389         if (!is_software_counter(group)) {
 390                 if (n >= max_count)
 391                         return -1;
 392                 ctrs[n] = group;
 393                 events[n++] = group->hw.config;
 394         }
 395         list_for_each_entry(counter, &group->sibling_list, list_entry) {
 396                 if (!is_software_counter(counter) &&
 397                     counter->state != PERF_COUNTER_STATE_OFF) {
 398                         if (n >= max_count)
 399                                 return -1;
 400                         ctrs[n] = counter;
 401                         events[n++] = counter->hw.config;
 402                 }
 403         }
 404         return n;
 405 }
 406
 407 static void counter_sched_in(struct perf_counter *counter, int cpu)
 408 {
 409         counter->state = PERF_COUNTER_STATE_ACTIVE;
 410         counter->oncpu = cpu;
 411         if (is_software_counter(counter))
 412                 counter->hw_ops->enable(counter);
 413 }
 414
 415 /*
 416  * Called to enable a whole group of counters.
 417  * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
 418  * Assumes the caller has disabled interrupts and has
 419  * frozen the PMU with hw_perf_save_disable.
 420  */
 421 int hw_perf_group_sched_in(struct perf_counter *group_leader,
 422                struct perf_cpu_context *cpuctx,
 423                struct perf_counter_context *ctx, int cpu)
 424 {
 425         struct cpu_hw_counters *cpuhw;
 426         long i, n, n0;
 427         struct perf_counter *sub;
 428
 429         cpuhw = &__get_cpu_var(cpu_hw_counters);
 430         n0 = cpuhw->n_counters;
 431         n = collect_events(group_leader, ppmu->n_counter - n0,
 432                            &cpuhw->counter[n0], &cpuhw->events[n0]);
 433         if (n < 0)
 434                 return -EAGAIN;
 435         if (power_check_constraints(cpuhw->events, n + n0))
 436                 return -EAGAIN;
 437         cpuhw->n_counters = n0 + n;
 438         cpuhw->n_added += n;
 439
 440         /*
 441          * OK, this group can go on; update counter states etc.,
 442          * and enable any software counters
 443          */
 444         for (i = n0; i < n0 + n; ++i)
 445                 cpuhw->counter[i]->hw.config = cpuhw->events[i];
 446         n = 1;
 447         counter_sched_in(group_leader, cpu);
 448         list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
 449                 if (sub->state != PERF_COUNTER_STATE_OFF) {
 450                         counter_sched_in(sub, cpu);
 451                         ++n;
 452                 }
 453         }
 454         cpuctx->active_oncpu += n;
 455         ctx->nr_active += n;
 456
 457         return 1;
 458 }
 459
 460 /*
 461  * Add a counter to the PMU.
 462  * If all counters are not already frozen, then we disable and
 463  * re-enable the PMU in order to get hw_perf_restore to do the
 464  * actual work of reconfiguring the PMU.
 465  */
 466 static int power_perf_enable(struct perf_counter *counter)
 467 {
 468         struct cpu_hw_counters *cpuhw;
 469         unsigned long flags;
 470         u64 pmudis;
 471         int n0;
 472         int ret = -EAGAIN;
 473
 474         local_irq_save(flags);
 475         pmudis = hw_perf_save_disable();
 476
 477         /*
 478          * Add the counter to the list (if there is room)
 479          * and check whether the total set is still feasible.
 480          */
 481         cpuhw = &__get_cpu_var(cpu_hw_counters);
 482         n0 = cpuhw->n_counters;
 483         if (n0 >= ppmu->n_counter)
 484                 goto out;
 485         cpuhw->counter[n0] = counter;
 486         cpuhw->events[n0] = counter->hw.config;
 487         if (power_check_constraints(cpuhw->events, n0 + 1))
 488                 goto out;
 489
 490         counter->hw.config = cpuhw->events[n0];
 491         ++cpuhw->n_counters;
 492         ++cpuhw->n_added;
 493
 494         ret = 0;
 495  out:
 496         hw_perf_restore(pmudis);
 497         local_irq_restore(flags);
 498         return ret;
 499 }
 500
 501 /*
 502  * Remove a counter from the PMU.
 503  */
 504 static void power_perf_disable(struct perf_counter *counter)
 505 {
 506         struct cpu_hw_counters *cpuhw;
 507         long i;
 508         u64 pmudis;
 509         unsigned long flags;
 510
 511         local_irq_save(flags);
 512         pmudis = hw_perf_save_disable();
 513
 514         power_perf_read(counter);
 515
 516         cpuhw = &__get_cpu_var(cpu_hw_counters);
 517         for (i = 0; i < cpuhw->n_counters; ++i) {
 518                 if (counter == cpuhw->counter[i]) {
 519                         while (++i < cpuhw->n_counters)
 520                                 cpuhw->counter[i-1] = cpuhw->counter[i];
 521                         --cpuhw->n_counters;
 522                         ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
 523                         write_pmc(counter->hw.idx, 0);
 524                         counter->hw.idx = 0;
 525                         break;
 526                 }
 527         }
 528         if (cpuhw->n_counters == 0) {
 529                 /* disable exceptions if no counters are running */
 530                 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
 531         }
 532
 533         hw_perf_restore(pmudis);
 534         local_irq_restore(flags);
 535 }
 536
 537 struct hw_perf_counter_ops power_perf_ops = {
 538         .enable = power_perf_enable,
 539         .disable = power_perf_disable,
 540         .read = power_perf_read
 541 };
 542
 543 const struct hw_perf_counter_ops *
 544 hw_perf_counter_init(struct perf_counter *counter)
 545 {
 546         unsigned long ev;
 547         struct perf_counter *ctrs[MAX_HWCOUNTERS];
 548         unsigned int events[MAX_HWCOUNTERS];
 549         int n;
 550
 551         if (!ppmu)
 552                 return NULL;
 553         if ((s64)counter->hw_event.irq_period < 0)
 554                 return NULL;
 555         ev = counter->hw_event.type;
 556         if (!counter->hw_event.raw) {
 557                 if (ev >= ppmu->n_generic ||
 558                     ppmu->generic_events[ev] == 0)
 559                         return NULL;
 560                 ev = ppmu->generic_events[ev];
 561         }
 562         counter->hw.config_base = ev;
 563         counter->hw.idx = 0;
 564
 565         /*
 566          * If this is in a group, check if it can go on with all the
 567          * other hardware counters in the group.  We assume the counter
 568          * hasn't been linked into its leader's sibling list at this point.
 569          */
 570         n = 0;
 571         if (counter->group_leader != counter) {
 572                 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
 573                                    ctrs, events);
 574                 if (n < 0)
 575                         return NULL;
 576         }
 577         events[n++] = ev;
 578         if (power_check_constraints(events, n))
 579                 return NULL;
 580
 581         counter->hw.config = events[n - 1];
 582         atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
 583         return &power_perf_ops;
 584 }
 585
 586 /*
 587  * Handle wakeups.
 588  */
 589 void perf_counter_do_pending(void)
 590 {
 591         int i;
 592         struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
 593         struct perf_counter *counter;
 594
 595         set_perf_counter_pending(0);
 596         for (i = 0; i < cpuhw->n_counters; ++i) {
 597                 counter = cpuhw->counter[i];
 598                 if (counter && counter->wakeup_pending) {
 599                         counter->wakeup_pending = 0;
 600                         wake_up(&counter->waitq);
 601                 }
 602         }
 603 }
 604
 605 /*
 606  * Record data for an irq counter.
 607  * This function was lifted from the x86 code; maybe it should
 608  * go in the core?
 609  */
 610 static void perf_store_irq_data(struct perf_counter *counter, u64 data)
 611 {
 612         struct perf_data *irqdata = counter->irqdata;
 613
 614         if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
 615                 irqdata->overrun++;
 616         } else {
 617                 u64 *p = (u64 *) &irqdata->data[irqdata->len];
 618
 619                 *p = data;
 620                 irqdata->len += sizeof(u64);
 621         }
 622 }
 623
 624 /*
 625  * Record all the values of the counters in a group
 626  */
 627 static void perf_handle_group(struct perf_counter *counter)
 628 {
 629         struct perf_counter *leader, *sub;
 630
 631         leader = counter->group_leader;
 632         list_for_each_entry(sub, &leader->sibling_list, list_entry) {
 633                 if (sub != counter)
 634                         sub->hw_ops->read(sub);
 635                 perf_store_irq_data(counter, sub->hw_event.type);
 636                 perf_store_irq_data(counter, atomic64_read(&sub->count));
 637         }
 638 }
 639
 640 /*
 641  * A counter has overflowed; update its count and record
 642  * things if requested.  Note that interrupts are hard-disabled
 643  * here so there is no possibility of being interrupted.
 644  */
 645 static void record_and_restart(struct perf_counter *counter, long val,
 646                                struct pt_regs *regs)
 647 {
 648         s64 prev, delta, left;
 649         int record = 0;
 650
 651         /* we don't have to worry about interrupts here */
 652         prev = atomic64_read(&counter->hw.prev_count);
 653         delta = (val - prev) & 0xfffffffful;
 654         atomic64_add(delta, &counter->count);
 655
 656         /*
 657          * See if the total period for this counter has expired,
 658          * and update for the next period.
 659          */
 660         val = 0;
 661         left = atomic64_read(&counter->hw.period_left) - delta;
 662         if (counter->hw_event.irq_period) {
 663                 if (left <= 0) {
 664                         left += counter->hw_event.irq_period;
 665                         if (left <= 0)
 666                                 left = counter->hw_event.irq_period;
 667                         record = 1;
 668                 }
 669                 if (left < 0x80000000L)
 670                         val = 0x80000000L - left;
 671         }
 672         write_pmc(counter->hw.idx, val);
 673         atomic64_set(&counter->hw.prev_count, val);
 674         atomic64_set(&counter->hw.period_left, left);
 675
 676         /*
 677          * Finally record data if requested.
 678          */
 679         if (record) {
 680                 switch (counter->hw_event.record_type) {
 681                 case PERF_RECORD_SIMPLE:
 682                         break;
 683                 case PERF_RECORD_IRQ:
 684                         perf_store_irq_data(counter, instruction_pointer(regs));
 685                         counter->wakeup_pending = 1;
 686                         break;
 687                 case PERF_RECORD_GROUP:
 688                         perf_handle_group(counter);
 689                         counter->wakeup_pending = 1;
 690                         break;
 691                 }
 692         }
 693 }
 694
 695 /*
 696  * Performance monitor interrupt stuff
 697  */
 698 static void perf_counter_interrupt(struct pt_regs *regs)
 699 {
 700         int i;
 701         struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
 702         struct perf_counter *counter;
 703         long val;
 704         int need_wakeup = 0, found = 0;
 705
 706         for (i = 0; i < cpuhw->n_counters; ++i) {
 707                 counter = cpuhw->counter[i];
 708                 val = read_pmc(counter->hw.idx);
 709                 if ((int)val < 0) {
 710                         /* counter has overflowed */
 711                         found = 1;
 712                         record_and_restart(counter, val, regs);
 713                         if (counter->wakeup_pending)
 714                                 need_wakeup = 1;
 715                 }
 716         }
 717
 718         /*
 719          * In case we didn't find and reset the counter that caused
 720          * the interrupt, scan all counters and reset any that are
 721          * negative, to avoid getting continual interrupts.
 722          * Any that we processed in the previous loop will not be negative.
 723          */
 724         if (!found) {
 725                 for (i = 0; i < ppmu->n_counter; ++i) {
 726                         val = read_pmc(i + 1);
 727                         if ((int)val < 0)
 728                                 write_pmc(i + 1, 0);
 729                 }
 730         }
 731
 732         /*
 733          * Reset MMCR0 to its normal value.  This will set PMXE and
 734          * clear FC (freeze counters) and PMAO (perf mon alert occurred)
 735          * and thus allow interrupts to occur again.
 736          * XXX might want to use MSR.PM to keep the counters frozen until
 737          * we get back out of this interrupt.
 738          */
 739         mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
 740
 741         /*
 742          * If we need a wakeup, check whether interrupts were soft-enabled
 743          * when we took the interrupt.  If they were, we can wake stuff up
 744          * immediately; otherwise we'll have to set a flag and do the
 745          * wakeup when interrupts get soft-enabled.
 746          */
 747         if (need_wakeup) {
 748                 if (regs->softe) {
 749                         irq_enter();
 750                         perf_counter_do_pending();
 751                         irq_exit();
 752                 } else {
 753                         set_perf_counter_pending(1);
 754                 }
 755         }
 756 }
 757
 758 void hw_perf_counter_setup(int cpu)
 759 {
 760         struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
 761
 762         memset(cpuhw, 0, sizeof(*cpuhw));
 763         cpuhw->mmcr[0] = MMCR0_FC;
 764 }
 765
 766 extern struct power_pmu ppc970_pmu;
 767 extern struct power_pmu power6_pmu;
 768
 769 static int init_perf_counters(void)
 770 {
 771         unsigned long pvr;
 772
 773         if (reserve_pmc_hardware(perf_counter_interrupt)) {
 774                 printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
 775                 return -EBUSY;
 776         }
 777
 778         /* XXX should get this from cputable */
 779         pvr = mfspr(SPRN_PVR);
 780         switch (PVR_VER(pvr)) {
 781         case PV_970:
 782         case PV_970FX:
 783         case PV_970MP:
 784                 ppmu = &ppc970_pmu;
 785                 break;
 786         case 0x3e:
 787                 ppmu = &power6_pmu;
 788                 break;
 789         }
 790         return 0;
 791 }
 792
 793 arch_initcall(init_perf_counters);