perf_counter: Generic per counter interrupt throttle

author Peter Zijlstra <a.p.zijlstra@chello.nl>

Mon, 25 May 2009 15:39:05 +0000 (17:39 +0200)

committer Ingo Molnar <mingo@elte.hu>

Mon, 25 May 2009 19:41:12 +0000 (21:41 +0200)
author Peter Zijlstra <a.p.zijlstra@chello.nl>
Mon, 25 May 2009 15:39:05 +0000 (17:39 +0200)
committer Ingo Molnar <mingo@elte.hu>
Mon, 25 May 2009 19:41:12 +0000 (21:41 +0200)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c

index 8c8177f859fe145488f8633025bc31dbf2b2cef2..c4b543d1a86fe4dbd60cb1c040250ad0eec5e8bd 100644 (file)
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -623,6 +623,18 @@ try_generic:
         return 0;
  }
  
+static void x86_pmu_unthrottle(struct perf_counter *counter)
+{
+       struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+       struct hw_perf_counter *hwc = &counter->hw;
+
+       if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
+                               cpuc->counters[hwc->idx] != counter))
+               return;
+
+       x86_pmu.enable(hwc, hwc->idx);
+}
+
  void perf_counter_print_debug(void)
  {
         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
@@ -1038,6 +1050,7 @@ static const struct pmu pmu = {
         .enable         = x86_pmu_enable,
         .disable        = x86_pmu_disable,
         .read           = x86_pmu_read,
+       .unthrottle     = x86_pmu_unthrottle,
  };
  
  const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h

index 0c160be2078f19207061d467f04fdd5fa18f7f9e..e3a7585d3e43b67dbfbb6da4db92d38a7426e5db 100644 (file)
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -266,6 +266,15 @@ enum perf_event_type {
          */
         PERF_EVENT_PERIOD               = 4,
  
+       /*
+        * struct {
+        *      struct perf_event_header        header;
+        *      u64                             time;
+        * };
+        */
+       PERF_EVENT_THROTTLE             = 5,
+       PERF_EVENT_UNTHROTTLE           = 6,
+
         /*
          * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
          * will be PERF_RECORD_*
@@ -367,6 +376,7 @@ struct pmu {
         int (*enable)                   (struct perf_counter *counter);
         void (*disable)                 (struct perf_counter *counter);
         void (*read)                    (struct perf_counter *counter);
+       void (*unthrottle)              (struct perf_counter *counter);
  };
  
  /**
@@ -613,6 +623,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
  
  extern int sysctl_perf_counter_priv;
  extern int sysctl_perf_counter_mlock;
+extern int sysctl_perf_counter_limit;
  
  extern void perf_counter_init(void);
  
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c

index 14b1fe984832f33e1621c9efe4fc02ecdf7cb819..ec9c4007a7f99d4af65a0c184025654d8d1a7cd9 100644 (file)
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -46,6 +46,7 @@ static atomic_t nr_comm_tracking __read_mostly;
  
  int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
  int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
+int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
  
  /*
   * Lock for (sysadmin-configurable) counter reservations:
@@ -1066,12 +1067,15 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
         __perf_counter_sched_in(ctx, cpuctx, cpu);
  }
  
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_counter *counter, int enable);
  static void perf_log_period(struct perf_counter *counter, u64 period);
  
  static void perf_adjust_freq(struct perf_counter_context *ctx)
  {
         struct perf_counter *counter;
-       u64 irq_period;
+       u64 interrupts, irq_period;
         u64 events, period;
         s64 delta;
  
@@ -1080,10 +1084,19 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
                 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
                         continue;
  
+               interrupts = counter->hw.interrupts;
+               counter->hw.interrupts = 0;
+
+               if (interrupts == MAX_INTERRUPTS) {
+                       perf_log_throttle(counter, 1);
+                       counter->pmu->unthrottle(counter);
+                       interrupts = 2*sysctl_perf_counter_limit/HZ;
+               }
+
                 if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
                         continue;
  
-               events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+               events = HZ * interrupts * counter->hw.irq_period;
                 period = div64_u64(events, counter->hw_event.irq_freq);
  
                 delta = (s64)(1 + period - counter->hw.irq_period);
@@ -1097,7 +1110,6 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
                 perf_log_period(counter, irq_period);
  
                 counter->hw.irq_period = irq_period;
-               counter->hw.interrupts = 0;
         }
         spin_unlock(&ctx->lock);
  }
@@ -2543,6 +2555,35 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
         perf_output_end(&handle);
  }
  
+/*
+ * IRQ throttle logging
+ */
+
+static void perf_log_throttle(struct perf_counter *counter, int enable)
+{
+       struct perf_output_handle handle;
+       int ret;
+
+       struct {
+               struct perf_event_header        header;
+               u64                             time;
+       } throttle_event = {
+               .header = {
+                       .type = PERF_EVENT_THROTTLE + 1,
+                       .misc = 0,
+                       .size = sizeof(throttle_event),
+               },
+               .time = sched_clock(),
+       };
+
+       ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0);
+       if (ret)
+               return;
+
+       perf_output_put(&handle, throttle_event);
+       perf_output_end(&handle);
+}
+
  /*
   * Generic counter overflow handling.
   */
@@ -2551,9 +2592,19 @@ int perf_counter_overflow(struct perf_counter *counter,
                           int nmi, struct pt_regs *regs, u64 addr)
  {
         int events = atomic_read(&counter->event_limit);
+       int throttle = counter->pmu->unthrottle != NULL;
         int ret = 0;
  
-       counter->hw.interrupts++;
+       if (!throttle) {
+               counter->hw.interrupts++;
+       } else if (counter->hw.interrupts != MAX_INTERRUPTS) {
+               counter->hw.interrupts++;
+               if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
+                       counter->hw.interrupts = MAX_INTERRUPTS;
+                       perf_log_throttle(counter, 0);
+                       ret = 1;
+               }
+       }
  
         /*
          * XXX event_limit might not quite work as expected on inherited
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 3cb1849f59895e9cb09f7b6cbc7f4371bb6bd7d5..0c4bf863afa32ffa9223e3cacafe89bd2f64c80c 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -930,6 +930,14 @@ static struct ctl_table kern_table[] = {
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
         },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "perf_counter_int_limit",
+               .data           = &sysctl_perf_counter_limit,
+               .maxlen         = sizeof(sysctl_perf_counter_limit),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
  #endif
  /*
   * NOTE: do not add new entries to this table unless you have read
author	Peter Zijlstra <a.p.zijlstra@chello.nl>
	Mon, 25 May 2009 15:39:05 +0000 (17:39 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 25 May 2009 19:41:12 +0000 (21:41 +0200)
arch/x86/kernel/cpu/perf_counter.c		patch \| blob \| history
include/linux/perf_counter.h		patch \| blob \| history
kernel/perf_counter.c		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history