Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

[karo-tx-linux.git] / virt / kvm / kvm_main.c
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index d8db2f8fce9c7ab727fb5dfa957d5f7ba828fa42..a25a73147f714458dd6c55fe7426649f9dd5baa2 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,9 +66,18 @@
  MODULE_AUTHOR("Qumranet");
  MODULE_LICENSE("GPL");
  
-static unsigned int halt_poll_ns;
+/* halt polling only reduces halt latency by 5-7 us, 500us is enough */
+static unsigned int halt_poll_ns = 500000;
  module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
  
+/* Default doubles per-vcpu halt_poll_ns. */
+static unsigned int halt_poll_ns_grow = 2;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu halt_poll_ns . */
+static unsigned int halt_poll_ns_shrink;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+
  /*
   * Ordering of locks:
   *
@@ -217,6 +226,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
         vcpu->kvm = kvm;
         vcpu->vcpu_id = id;
         vcpu->pid = NULL;
+       vcpu->halt_poll_ns = 0;
         init_waitqueue_head(&vcpu->wq);
         kvm_async_pf_vcpu_init(vcpu);
  
@@ -387,6 +397,36 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
         return young;
  }
  
+static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
+                                       struct mm_struct *mm,
+                                       unsigned long start,
+                                       unsigned long end)
+{
+       struct kvm *kvm = mmu_notifier_to_kvm(mn);
+       int young, idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+       /*
+        * Even though we do not flush TLB, this will still adversely
+        * affect performance on pre-Haswell Intel EPT, where there is
+        * no EPT Access Bit to clear so that we have to tear down EPT
+        * tables instead. If we find this unacceptable, we can always
+        * add a parameter to kvm_age_hva so that it effectively doesn't
+        * do anything on clear_young.
+        *
+        * Also note that currently we never issue secondary TLB flushes
+        * from clear_young, leaving this job up to the regular system
+        * cadence. If we find this inaccurate, we might come up with a
+        * more sophisticated heuristic later.
+        */
+       young = kvm_age_hva(kvm, start, end);
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       return young;
+}
+
  static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
                                        struct mm_struct *mm,
                                        unsigned long address)
@@ -419,6 +459,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
         .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
         .invalidate_range_end   = kvm_mmu_notifier_invalidate_range_end,
         .clear_flush_young      = kvm_mmu_notifier_clear_flush_young,
+       .clear_young            = kvm_mmu_notifier_clear_young,
         .test_young             = kvm_mmu_notifier_test_young,
         .change_pte             = kvm_mmu_notifier_change_pte,
         .release                = kvm_mmu_notifier_release,
@@ -1906,6 +1947,35 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
  }
  EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
  
+static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+       int old, val;
+
+       old = val = vcpu->halt_poll_ns;
+       /* 10us base */
+       if (val == 0 && halt_poll_ns_grow)
+               val = 10000;
+       else
+               val *= halt_poll_ns_grow;
+
+       vcpu->halt_poll_ns = val;
+       trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
+}
+
+static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+       int old, val;
+
+       old = val = vcpu->halt_poll_ns;
+       if (halt_poll_ns_shrink == 0)
+               val = 0;
+       else
+               val /= halt_poll_ns_shrink;
+
+       vcpu->halt_poll_ns = val;
+       trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
+}
+
  static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
  {
         if (kvm_arch_vcpu_runnable(vcpu)) {
@@ -1928,10 +1998,11 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
         ktime_t start, cur;
         DEFINE_WAIT(wait);
         bool waited = false;
+       u64 block_ns;
  
         start = cur = ktime_get();
-       if (halt_poll_ns) {
-               ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+       if (vcpu->halt_poll_ns) {
+               ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
  
                 do {
                         /*
@@ -1960,7 +2031,21 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
         cur = ktime_get();
  
  out:
-       trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
+       block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
+
+       if (halt_poll_ns) {
+               if (block_ns <= vcpu->halt_poll_ns)
+                       ;
+               /* we had a long block, shrink polling */
+               else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+                       shrink_halt_poll_ns(vcpu);
+               /* we had a short halt and our poll time is too small */
+               else if (vcpu->halt_poll_ns < halt_poll_ns &&
+                       block_ns < halt_poll_ns)
+                       grow_halt_poll_ns(vcpu);
+       }
+
+       trace_kvm_vcpu_wakeup(block_ns, waited);
  }
  EXPORT_SYMBOL_GPL(kvm_vcpu_block);